Discussion:
[libvirt] [PATCH 1/9] Rename virResctrlInfo to virResctrlInfoPerCache
(too old to reply)
Martin Kletzander
2017-12-13 15:39:23 UTC
Permalink
Just to ease the review of following patches.

Signed-off-by: Martin Kletzander <***@redhat.com>
---
src/conf/capabilities.c | 2 +-
src/conf/capabilities.h | 2 +-
src/util/virresctrl.c | 4 ++--
src/util/virresctrl.h | 8 ++++----
4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/conf/capabilities.c b/src/conf/capabilities.c
index 798c9bdaeaf3..e93eaed2f0e0 100644
--- a/src/conf/capabilities.c
+++ b/src/conf/capabilities.c
@@ -904,7 +904,7 @@ virCapabilitiesFormatCaches(virBufferPtr buf,
virBufferSetChildIndent(&controlBuf, buf);
for (j = 0; j < bank->ncontrols; j++) {
const char *min_unit;
- virResctrlInfoPtr controls = bank->controls[j];
+ virResctrlInfoPerCachePtr controls = bank->controls[j];
unsigned long long gran_short_size = controls->granularity;
unsigned long long min_short_size = controls->min;

diff --git a/src/conf/capabilities.h b/src/conf/capabilities.h
index 5048fa819d95..27b88cb5edfa 100644
--- a/src/conf/capabilities.h
+++ b/src/conf/capabilities.h
@@ -148,7 +148,7 @@ struct _virCapsHostCacheBank {
virCacheType type; /* Data, Instruction or Unified */
virBitmapPtr cpus; /* All CPUs that share this bank */
size_t ncontrols;
- virResctrlInfoPtr *controls;
+ virResctrlInfoPerCachePtr *controls;
};

typedef struct _virCapsHost virCapsHost;
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 2a11825a52dc..050a08178e24 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -59,7 +59,7 @@ int
virResctrlGetCacheInfo(unsigned int level,
unsigned long long size,
virCacheType scope,
- virResctrlInfoPtr **controls,
+ virResctrlInfoPerCachePtr **controls,
size_t *ncontrols)
{
int ret = -1;
@@ -69,7 +69,7 @@ virResctrlGetCacheInfo(unsigned int level,
char *type_upper = NULL;
unsigned int bits = 0;
unsigned int min_cbm_bits = 0;
- virResctrlInfoPtr control;
+ virResctrlInfoPerCachePtr control;

if (VIR_ALLOC(control) < 0)
goto cleanup;
diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
index 848b13e98aa3..42e852780318 100644
--- a/src/util/virresctrl.h
+++ b/src/util/virresctrl.h
@@ -36,9 +36,9 @@ typedef enum {
VIR_ENUM_DECL(virCache);


-typedef struct _virResctrlInfo virResctrlInfo;
-typedef virResctrlInfo *virResctrlInfoPtr;
-struct _virResctrlInfo {
+typedef struct _virResctrlInfoPerCache virResctrlInfoPerCache;
+typedef virResctrlInfoPerCache *virResctrlInfoPerCachePtr;
+struct _virResctrlInfoPerCache {
/* Smallest possible increase of the allocation size in bytes */
unsigned long long granularity;
/* Minimal allocatable size in bytes (if different from granularity) */
@@ -54,7 +54,7 @@ int
virResctrlGetCacheInfo(unsigned int level,
unsigned long long size,
virCacheType scope,
- virResctrlInfoPtr **controls,
+ virResctrlInfoPerCachePtr **controls,
size_t *ncontrols);

int
--
2.15.1
Martin Kletzander
2017-12-13 15:39:25 UTC
Permalink
Signed-off-by: Martin Kletzander <***@redhat.com>
---
src/conf/capabilities.c | 53 ++++++++++++++++++++++++-------------------------
src/conf/capabilities.h | 2 ++
2 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/src/conf/capabilities.c b/src/conf/capabilities.c
index e93eaed2f0e0..edf9f54f7710 100644
--- a/src/conf/capabilities.c
+++ b/src/conf/capabilities.c
@@ -245,6 +245,7 @@ virCapabilitiesDispose(void *object)
VIR_FREE(caps->host.netprefix);
VIR_FREE(caps->host.pagesSize);
virCPUDefFree(caps->host.cpu);
+ virObjectUnref(caps->host.resctrl);
}

/**
@@ -1592,6 +1593,20 @@ virCapsHostCacheBankSorter(const void *a,
}


+static int
+virCapabilitiesInitResctrl(virCapsPtr caps)
+{
+ if (caps->host.resctrl)
+ return 0;
+
+ caps->host.resctrl = virResctrlInfoNew();
+ if (!caps->host.resctrl)
+ return -1;
+
+ return virResctrlGetInfo(caps->host.resctrl);
+}
+
+
int
virCapabilitiesInitCaches(virCapsPtr caps)
{
@@ -1600,7 +1615,6 @@ virCapabilitiesInitCaches(virCapsPtr caps)
ssize_t pos = -1;
DIR *dirp = NULL;
int ret = -1;
- int typeret;
char *path = NULL;
char *type = NULL;
struct dirent *ent = NULL;
@@ -1611,6 +1625,9 @@ virCapabilitiesInitCaches(virCapsPtr caps)
* lose information. */
const int cache_min_level = 3;

+ if (virCapabilitiesInitResctrl(caps) < 0)
+ return -1;
+
/* offline CPUs don't provide cache info */
if (virFileReadValueBitmap(&cpus, "%s/cpu/online", SYSFS_SYSTEM_PATH) < 0)
return -1;
@@ -1676,32 +1693,6 @@ virCapabilitiesInitCaches(virCapsPtr caps)
SYSFS_SYSTEM_PATH, pos, ent->d_name) < 0)
goto cleanup;

- typeret = virResctrlGetCacheControlType(bank->level);
- if (typeret < 0)
- goto cleanup;
-
- if (typeret == 1) {
- if (virResctrlGetCacheInfo(bank->level,
- bank->size,
- VIR_CACHE_TYPE_BOTH,
- &bank->controls,
- &bank->ncontrols) < 0)
- goto cleanup;
- } else if (typeret == 2) {
- if (virResctrlGetCacheInfo(bank->level,
- bank->size,
- VIR_CACHE_TYPE_CODE,
- &bank->controls,
- &bank->ncontrols) < 0)
- goto cleanup;
- if (virResctrlGetCacheInfo(bank->level,
- bank->size,
- VIR_CACHE_TYPE_DATA,
- &bank->controls,
- &bank->ncontrols) < 0)
- goto cleanup;
- }
-
kernel_type = virCacheKernelTypeFromString(type);
if (kernel_type < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
@@ -1717,6 +1708,14 @@ virCapabilitiesInitCaches(virCapsPtr caps)
break;
}
if (i == caps->host.ncaches) {
+ /* If it is a new cache, then update its resctrl information. */
+ if (virResctrlInfoGetCache(caps->host.resctrl,
+ bank->level,
+ bank->size,
+ &bank->ncontrols,
+ &bank->controls) < 0)
+ goto cleanup;
+
if (VIR_APPEND_ELEMENT(caps->host.caches,
caps->host.ncaches,
bank) < 0) {
diff --git a/src/conf/capabilities.h b/src/conf/capabilities.h
index 27b88cb5edfa..694a3590bf83 100644
--- a/src/conf/capabilities.h
+++ b/src/conf/capabilities.h
@@ -170,6 +170,8 @@ struct _virCapsHost {
size_t nnumaCell_max;
virCapsHostNUMACellPtr *numaCell;

+ virResctrlInfoPtr resctrl;
+
size_t ncaches;
virCapsHostCacheBankPtr *caches;
--
2.15.1
John Ferlan
2018-01-02 20:39:36 UTC
Permalink
Post by Martin Kletzander
---
src/conf/capabilities.c | 53 ++++++++++++++++++++++++-------------------------
src/conf/capabilities.h | 2 ++
2 files changed, 28 insertions(+), 27 deletions(-)
Reviewed-by: John Ferlan <***@redhat.com>

John
Martin Kletzander
2017-12-13 15:39:26 UTC
Permalink
Signed-off-by: Martin Kletzander <***@redhat.com>
---
src/libvirt_private.syms | 2 -
src/util/virresctrl.c | 132 -----------------------------------------------
src/util/virresctrl.h | 11 ----
3 files changed, 145 deletions(-)

diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 75be612a2f13..1a4f304f5e1f 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2548,8 +2548,6 @@ virRandomInt;
# util/virresctrl.h
virCacheTypeFromString;
virCacheTypeToString;
-virResctrlGetCacheControlType;
-virResctrlGetCacheInfo;
virResctrlGetInfo;
virResctrlInfoGetCache;
virResctrlInfoNew;
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 6fd1ceb587cf..a681322905be 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -354,135 +354,3 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
VIR_FREE(*controls);
goto cleanup;
}
-
-
-int
-virResctrlGetCacheInfo(unsigned int level,
- unsigned long long size,
- virCacheType scope,
- virResctrlInfoPerCachePtr **controls,
- size_t *ncontrols)
-{
- int ret = -1;
- char *tmp = NULL;
- char *path = NULL;
- char *cbm_mask = NULL;
- char *type_upper = NULL;
- unsigned int bits = 0;
- unsigned int min_cbm_bits = 0;
- virResctrlInfoPerCachePtr control;
-
- if (VIR_ALLOC(control) < 0)
- goto cleanup;
-
- if (scope != VIR_CACHE_TYPE_BOTH &&
- virStringToUpper(&type_upper, virCacheTypeToString(scope)) < 0)
- goto cleanup;
-
- if (virFileReadValueUint(&control->max_allocation,
- SYSFS_RESCTRL_PATH "/info/L%u%s/num_closids",
- level,
- type_upper ? type_upper : "") < 0)
- goto cleanup;
-
- if (virFileReadValueString(&cbm_mask,
- SYSFS_RESCTRL_PATH
- "/info/L%u%s/cbm_mask",
- level,
- type_upper ? type_upper: "") < 0)
- goto cleanup;
-
- if (virFileReadValueUint(&min_cbm_bits,
- SYSFS_RESCTRL_PATH "/info/L%u%s/min_cbm_bits",
- level,
- type_upper ? type_upper : "") < 0)
- goto cleanup;
-
- virStringTrimOptionalNewline(cbm_mask);
-
- for (tmp = cbm_mask; *tmp != '\0'; tmp++) {
- if (c_isxdigit(*tmp))
- bits += count_one_bits(virHexToBin(*tmp));
- }
-
- control->granularity = size / bits;
- if (min_cbm_bits != 1)
- control->min = min_cbm_bits * control->granularity;
-
- control->scope = scope;
-
- if (VIR_APPEND_ELEMENT(*controls, *ncontrols, control) < 0)
- goto cleanup;
-
- ret = 0;
-
- cleanup:
- VIR_FREE(path);
- VIR_FREE(cbm_mask);
- VIR_FREE(type_upper);
- VIR_FREE(control);
- return ret;
-}
-
-
-static inline int
-virResctrlGetCacheDir(char **path,
- const char *prefix,
- unsigned int level,
- virCacheType type)
-{
- return virAsprintf(path,
- SYSFS_RESCTRL_PATH "%s/L%u%s",
- prefix ? prefix : "",
- level,
- virResctrlTypeToString(type));
-}
-
-
-/*
- * This function tests whether TYPE of cache control is supported or not.
- *
- * Returns 0 if not, 1 if yes and negative value on error.
- */
-static int
-virResctrlGetCacheSupport(unsigned int level, virCacheType type)
-{
- int ret = -1;
- char *path = NULL;
-
- if (virResctrlGetCacheDir(&path, "/info", level, type) < 0)
- return -1;
-
- ret = virFileExists(path);
- VIR_FREE(path);
- return ret;
-}
-
-
-/*
- * This function tests which TYPE of cache control is supported
- * Return values are:
- * -1: error
- * 0: none
- * 1: CAT
- * 2: CDP
- */
-int
-virResctrlGetCacheControlType(unsigned int level)
-{
- int rv = -1;
-
- rv = virResctrlGetCacheSupport(level, VIR_CACHE_TYPE_BOTH);
- if (rv < 0)
- return -1;
- if (rv)
- return 1;
-
- rv = virResctrlGetCacheSupport(level, VIR_CACHE_TYPE_CODE);
- if (rv < 0)
- return -1;
- if (rv)
- return 2;
-
- return 0;
-}
diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
index 43063903730b..471c02f28dcd 100644
--- a/src/util/virresctrl.h
+++ b/src/util/virresctrl.h
@@ -68,15 +68,4 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
size_t *ncontrols,
virResctrlInfoPerCachePtr **controls);

-/* To be removed */
-int
-virResctrlGetCacheInfo(unsigned int level,
- unsigned long long size,
- virCacheType scope,
- virResctrlInfoPerCachePtr **controls,
- size_t *ncontrols);
-
-int
-virResctrlGetCacheControlType(unsigned int level);
-
#endif /* __VIR_RESCTRL_H__ */
--
2.15.1
John Ferlan
2018-01-02 20:43:42 UTC
Permalink
Post by Martin Kletzander
---
src/libvirt_private.syms | 2 -
src/util/virresctrl.c | 132 -----------------------------------------------
src/util/virresctrl.h | 11 ----
3 files changed, 145 deletions(-)
Reviewed-by: John Ferlan <***@redhat.com>

John
Martin Kletzander
2017-12-13 15:39:24 UTC
Permalink
This will make the current functions obsolete and it will provide more
information to the virresctrl module so that it can be used later.

Signed-off-by: Martin Kletzander <***@redhat.com>
---
po/POTFILES.in | 1 +
src/libvirt_private.syms | 3 +
src/util/virresctrl.c | 301 +++++++++++++++++++++++++++++++++++++++++++++++
src/util/virresctrl.h | 19 +++
4 files changed, 324 insertions(+)

diff --git a/po/POTFILES.in b/po/POTFILES.in
index c1fa23427eff..8382ee633621 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -252,6 +252,7 @@ src/util/virportallocator.c
src/util/virprocess.c
src/util/virqemu.c
src/util/virrandom.c
+src/util/virresctrl.c
src/util/virrotatingfile.c
src/util/virscsi.c
src/util/virscsihost.c
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index de4ec4d442c9..75be612a2f13 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2550,6 +2550,9 @@ virCacheTypeFromString;
virCacheTypeToString;
virResctrlGetCacheControlType;
virResctrlGetCacheInfo;
+virResctrlGetInfo;
+virResctrlInfoGetCache;
+virResctrlInfoNew;


# util/virrotatingfile.h
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 050a08178e24..6fd1ceb587cf 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -25,12 +25,15 @@
#include "viralloc.h"
#include "virfile.h"
#include "virlog.h"
+#include "virobject.h"
#include "virstring.h"

#define VIR_FROM_THIS VIR_FROM_RESCTRL

VIR_LOG_INIT("util.virresctrl")

+
+/* Common definitions */
#define SYSFS_RESCTRL_PATH "/sys/fs/resctrl"

/* Resctrl is short for Resource Control. It might be implemented for various
@@ -55,6 +58,304 @@ VIR_ENUM_IMPL(virResctrl, VIR_CACHE_TYPE_LAST,
"CODE",
"DATA")

+
+/* Info-related definitions and InfoClass-related functions */
+typedef struct _virResctrlInfoPerType virResctrlInfoPerType;
+typedef virResctrlInfoPerType *virResctrlInfoPerTypePtr;
+struct _virResctrlInfoPerType {
+ /* Kernel-provided information */
+ char *cbm_mask;
+ unsigned int min_cbm_bits;
+
+ /* Our computed information from the above */
+ unsigned int bits;
+ unsigned int max_cache_id;
+
+ /* In order to be self-sufficient we need size information per cache.
+ * Funnily enough, one of the outcomes of the resctrlfs design is that it
+ * does not account for different sizes per cache on the same level. So
+ * for the sake of easiness, let's copy that, for now. */
+ unsigned long long size;
+
+ /* Information that we will return upon request (this is public struct) as
+ * until now all the above is internal to this module */
+ virResctrlInfoPerCache control;
+};
+
+typedef struct _virResctrlInfoPerLevel virResctrlInfoPerLevel;
+typedef virResctrlInfoPerLevel *virResctrlInfoPerLevelPtr;
+struct _virResctrlInfoPerLevel {
+ virResctrlInfoPerTypePtr *types;
+};
+
+struct _virResctrlInfo {
+ virObject parent;
+
+ virResctrlInfoPerLevelPtr *levels;
+ size_t nlevels;
+};
+
+static virClassPtr virResctrlInfoClass;
+
+static void
+virResctrlInfoDispose(void *obj)
+{
+ size_t i = 0;
+ size_t j = 0;
+
+ virResctrlInfoPtr resctrl = obj;
+
+ for (i = 0; i < resctrl->nlevels; i++) {
+ virResctrlInfoPerLevelPtr level = resctrl->levels[i];
+
+ if (!level)
+ continue;
+
+ if (level->types) {
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ if (level->types[j])
+ VIR_FREE(level->types[j]->cbm_mask);
+ VIR_FREE(level->types[j]);
+ }
+ }
+ VIR_FREE(level->types);
+ VIR_FREE(level);
+ }
+
+ VIR_FREE(resctrl->levels);
+}
+
+
+static int virResctrlInfoOnceInit(void)
+{
+ if (!(virResctrlInfoClass = virClassNew(virClassForObject(),
+ "virResctrlInfo",
+ sizeof(virResctrlInfo),
+ virResctrlInfoDispose)))
+ return -1;
+
+ return 0;
+}
+
+
+VIR_ONCE_GLOBAL_INIT(virResctrlInfo)
+
+
+virResctrlInfoPtr
+virResctrlInfoNew(void)
+{
+ if (virResctrlInfoInitialize() < 0)
+ return NULL;
+
+ return virObjectNew(virResctrlInfoClass);
+}
+
+
+/* Info-related functions */
+bool
+virResctrlInfoIsEmpty(virResctrlInfoPtr resctrl)
+{
+ size_t i = 0;
+ size_t j = 0;
+
+ if (!resctrl)
+ return true;
+
+ for (i = 0; i < resctrl->nlevels; i++) {
+ virResctrlInfoPerLevelPtr i_level = resctrl->levels[i];
+
+ if (!i_level)
+ continue;
+
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ if (i_level->types[j])
+ return false;
+ }
+ }
+
+ return true;
+}
+
+int
+virResctrlGetInfo(virResctrlInfoPtr resctrl)
+{
+ DIR *dirp = NULL;
+ char *info_path = NULL;
+ char *endptr = NULL;
+ char *tmp_str = NULL;
+ int ret = -1;
+ int rv = -1;
+ int type = 0;
+ struct dirent *ent = NULL;
+ unsigned int level = 0;
+ virResctrlInfoPerLevelPtr i_level = NULL;
+ virResctrlInfoPerTypePtr i_type = NULL;
+
+ rv = virDirOpenIfExists(&dirp, SYSFS_RESCTRL_PATH "/info");
+ if (rv <= 0) {
+ ret = rv;
+ goto cleanup;
+ }
+
+ while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH "/info")) > 0) {
+ if (ent->d_type != DT_DIR)
+ continue;
+
+ if (ent->d_name[0] != 'L')
+ continue;
+
+ if (virStrToLong_uip(ent->d_name + 1, &endptr, 10, &level) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot parse resctrl cache info level"));
+ goto cleanup;
+ }
+
+ type = virResctrlTypeFromString(endptr);
+ if (type < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot parse resctrl cache info type"));
+ goto cleanup;
+ }
+
+ if (VIR_ALLOC(i_type) < 0)
+ goto cleanup;
+
+ i_type->control.scope = type;
+
+ rv = virFileReadValueUint(&i_type->control.max_allocation,
+ SYSFS_RESCTRL_PATH "/info/%s/num_closids",
+ ent->d_name);
+ if (rv == -2)
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot get num_closids from resctrl cache info"));
+ if (rv < 0)
+ goto cleanup;
+
+ rv = virFileReadValueString(&i_type->cbm_mask,
+ SYSFS_RESCTRL_PATH
+ "/info/%s/cbm_mask",
+ ent->d_name);
+ if (rv == -2)
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot get cbm_mask from resctrl cache info"));
+ if (rv < 0)
+ goto cleanup;
+
+ rv = virFileReadValueUint(&i_type->min_cbm_bits,
+ SYSFS_RESCTRL_PATH "/info/%s/min_cbm_bits",
+ ent->d_name);
+ if (rv == -2)
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot get min_cbm_bits from resctrl cache info"));
+ if (rv < 0)
+ goto cleanup;
+
+ virStringTrimOptionalNewline(i_type->cbm_mask);
+
+ if (resctrl->nlevels <= level &&
+ VIR_EXPAND_N(resctrl->levels, resctrl->nlevels,
+ level - resctrl->nlevels + 1) < 0)
+ goto cleanup;
+
+ if (!resctrl->levels[level] &&
+ (VIR_ALLOC(resctrl->levels[level]) < 0 ||
+ VIR_ALLOC_N(resctrl->levels[level]->types, VIR_CACHE_TYPE_LAST) < 0))
+ goto cleanup;
+ i_level = resctrl->levels[level];
+
+ if (i_level->types[type]) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Duplicate cache type in resctrlfs for level %u"),
+ level);
+ goto cleanup;
+ }
+
+ for (tmp_str = i_type->cbm_mask; *tmp_str != '\0'; tmp_str++) {
+ if (!c_isxdigit(*tmp_str)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot parse cbm_mask from resctrl cache info"));
+ goto cleanup;
+ }
+
+ i_type->bits += count_one_bits(virHexToBin(*tmp_str));
+ }
+
+ VIR_STEAL_PTR(i_level->types[type], i_type);
+ }
+
+ ret = 0;
+ cleanup:
+ VIR_DIR_CLOSE(dirp);
+ VIR_FREE(info_path);
+ VIR_FREE(i_type);
+ return ret;
+}
+
+
+int
+virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
+ unsigned int level,
+ unsigned long long size,
+ size_t *ncontrols,
+ virResctrlInfoPerCachePtr **controls)
+{
+ virResctrlInfoPerLevelPtr i_level = NULL;
+ virResctrlInfoPerTypePtr i_type = NULL;
+ size_t i = 0;
+ int ret = -1;
+
+ if (virResctrlInfoIsEmpty(resctrl))
+ return 0;
+
+ if (level >= resctrl->nlevels)
+ return 0;
+
+ i_level = resctrl->levels[level];
+ if (!i_level)
+ return 0;
+
+ for (i = 0; i < VIR_CACHE_TYPE_LAST; i++) {
+ i_type = i_level->types[i];
+ if (!i_type)
+ continue;
+
+ /* Let's take the opportunity to update our internal information about
+ * the cache size */
+ if (!i_type->size) {
+ i_type->size = size;
+ i_type->control.granularity = size / i_type->bits;
+ if (i_type->min_cbm_bits != 1)
+ i_type->control.min = i_type->min_cbm_bits * i_type->control.granularity;
+ } else {
+ if (i_type->size != size) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Forbidden inconsistency for resctrlfs, "
+ "level %u caches have different sizes"),
+ level);
+ goto error;
+ }
+ i_type->max_cache_id++;
+ }
+
+ if (VIR_EXPAND_N(*controls, *ncontrols, 1) < 0)
+ goto error;
+ if (VIR_ALLOC((*controls)[*ncontrols - 1]) < 0)
+ goto error;
+
+ memcpy((*controls)[*ncontrols - 1], &i_type->control, sizeof(i_type->control));
+ }
+
+ ret = 0;
+ cleanup:
+ return ret;
+ error:
+ while (*ncontrols)
+ VIR_FREE((*controls)[--*ncontrols]);
+ VIR_FREE(*controls);
+ goto cleanup;
+}
+
+
int
virResctrlGetCacheInfo(unsigned int level,
unsigned long long size,
diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
index 42e852780318..43063903730b 100644
--- a/src/util/virresctrl.h
+++ b/src/util/virresctrl.h
@@ -49,7 +49,26 @@ struct _virResctrlInfoPerCache {
unsigned int max_allocation;
};

+typedef struct _virResctrlInfo virResctrlInfo;
+typedef virResctrlInfo *virResctrlInfoPtr;

+virResctrlInfoPtr
+virResctrlInfoNew(void);
+
+bool
+virResctrlInfoIsEmpty(virResctrlInfoPtr resctrl);
+
+int
+virResctrlGetInfo(virResctrlInfoPtr resctrl);
+
+int
+virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
+ unsigned int level,
+ unsigned long long size,
+ size_t *ncontrols,
+ virResctrlInfoPerCachePtr **controls);
+
+/* To be removed */
int
virResctrlGetCacheInfo(unsigned int level,
unsigned long long size,
--
2.15.1
John Ferlan
2018-01-02 20:08:30 UTC
Permalink
Post by Martin Kletzander
This will make the current functions obsolete and it will provide more
information to the virresctrl module so that it can be used later.
---
po/POTFILES.in | 1 +
src/libvirt_private.syms | 3 +
src/util/virresctrl.c | 301 +++++++++++++++++++++++++++++++++++++++++++++++
src/util/virresctrl.h | 19 +++
4 files changed, 324 insertions(+)
diff --git a/po/POTFILES.in b/po/POTFILES.in
index c1fa23427eff..8382ee633621 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -252,6 +252,7 @@ src/util/virportallocator.c
src/util/virprocess.c
src/util/virqemu.c
src/util/virrandom.c
+src/util/virresctrl.c
src/util/virrotatingfile.c
src/util/virscsi.c
src/util/virscsihost.c
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index de4ec4d442c9..75be612a2f13 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2550,6 +2550,9 @@ virCacheTypeFromString;
virCacheTypeToString;
virResctrlGetCacheControlType;
virResctrlGetCacheInfo;
+virResctrlGetInfo;
+virResctrlInfoGetCache;
+virResctrlInfoNew;
# util/virrotatingfile.h
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 050a08178e24..6fd1ceb587cf 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -25,12 +25,15 @@
#include "viralloc.h"
#include "virfile.h"
#include "virlog.h"
+#include "virobject.h"
#include "virstring.h"
#define VIR_FROM_THIS VIR_FROM_RESCTRL
VIR_LOG_INIT("util.virresctrl")
+
+/* Common definitions */
#define SYSFS_RESCTRL_PATH "/sys/fs/resctrl"
/* Resctrl is short for Resource Control. It might be implemented for various
@@ -55,6 +58,304 @@ VIR_ENUM_IMPL(virResctrl, VIR_CACHE_TYPE_LAST,
"CODE",
"DATA")
+
+/* Info-related definitions and InfoClass-related functions */
+typedef struct _virResctrlInfoPerType virResctrlInfoPerType;
+typedef virResctrlInfoPerType *virResctrlInfoPerTypePtr;
+struct _virResctrlInfoPerType {
+ /* Kernel-provided information */
+ char *cbm_mask;
+ unsigned int min_cbm_bits;
+
+ /* Our computed information from the above */
+ unsigned int bits;
+ unsigned int max_cache_id;
+
+ /* In order to be self-sufficient we need size information per cache.
+ * Funnily enough, one of the outcomes of the resctrlfs design is that it
+ * does not account for different sizes per cache on the same level. So
+ * for the sake of easiness, let's copy that, for now. */
+ unsigned long long size;
+
+ /* Information that we will return upon request (this is public struct) as
+ * until now all the above is internal to this module */
+ virResctrlInfoPerCache control;
+};
+
+typedef struct _virResctrlInfoPerLevel virResctrlInfoPerLevel;
+typedef virResctrlInfoPerLevel *virResctrlInfoPerLevelPtr;
+struct _virResctrlInfoPerLevel {
+ virResctrlInfoPerTypePtr *types;
+};
+
+struct _virResctrlInfo {
+ virObject parent;
+
+ virResctrlInfoPerLevelPtr *levels;
+ size_t nlevels;
+};
+
+static virClassPtr virResctrlInfoClass;
+
+static void
+virResctrlInfoDispose(void *obj)
+{
+ size_t i = 0;
+ size_t j = 0;
+
+ virResctrlInfoPtr resctrl = obj;
+
+ for (i = 0; i < resctrl->nlevels; i++) {
+ virResctrlInfoPerLevelPtr level = resctrl->levels[i];
+
+ if (!level)
+ continue;
+
+ if (level->types) {
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ if (level->types[j])
+ VIR_FREE(level->types[j]->cbm_mask);
+ VIR_FREE(level->types[j]);
+ }
+ }
+ VIR_FREE(level->types);
+ VIR_FREE(level);
+ }
+
+ VIR_FREE(resctrl->levels);
+}
+
+
+static int virResctrlInfoOnceInit(void)
static int
virRes....
Post by Martin Kletzander
+{
+ if (!(virResctrlInfoClass = virClassNew(virClassForObject(),
+ "virResctrlInfo",
+ sizeof(virResctrlInfo),
+ virResctrlInfoDispose)))
+ return -1;
+
+ return 0;
+}
+
+
+VIR_ONCE_GLOBAL_INIT(virResctrlInfo)
+
+
+virResctrlInfoPtr
+virResctrlInfoNew(void)
+{
+ if (virResctrlInfoInitialize() < 0)
+ return NULL;
+
+ return virObjectNew(virResctrlInfoClass);
+}
+
+
+/* Info-related functions */
+bool
+virResctrlInfoIsEmpty(virResctrlInfoPtr resctrl)
+{
+ size_t i = 0;
+ size_t j = 0;
+
+ if (!resctrl)
+ return true;
+
+ for (i = 0; i < resctrl->nlevels; i++) {
+ virResctrlInfoPerLevelPtr i_level = resctrl->levels[i];
+
+ if (!i_level)
+ continue;
+
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ if (i_level->types[j])
+ return false;
+ }
+ }
+
+ return true;
+}
+
Two blank lines
Post by Martin Kletzander
+int
+virResctrlGetInfo(virResctrlInfoPtr resctrl)
+{
+ DIR *dirp = NULL;
+ char *info_path = NULL;
+ char *endptr = NULL;
+ char *tmp_str = NULL;
+ int ret = -1;
+ int rv = -1;
+ int type = 0;
+ struct dirent *ent = NULL;
+ unsigned int level = 0;
+ virResctrlInfoPerLevelPtr i_level = NULL;
+ virResctrlInfoPerTypePtr i_type = NULL;
+
+ rv = virDirOpenIfExists(&dirp, SYSFS_RESCTRL_PATH "/info");
+ if (rv <= 0) {
+ ret = rv;
+ goto cleanup;
+ }
+
+ while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH "/info")) > 0) {
+ if (ent->d_type != DT_DIR)
+ continue;
+
+ if (ent->d_name[0] != 'L')
+ continue;
+
+ if (virStrToLong_uip(ent->d_name + 1, &endptr, 10, &level) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot parse resctrl cache info level"));
+ goto cleanup;
+ }
+
+ type = virResctrlTypeFromString(endptr);
+ if (type < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot parse resctrl cache info type"));
+ goto cleanup;
+ }
+
+ if (VIR_ALLOC(i_type) < 0)
+ goto cleanup;
+
+ i_type->control.scope = type;
+
+ rv = virFileReadValueUint(&i_type->control.max_allocation,
+ SYSFS_RESCTRL_PATH "/info/%s/num_closids",
+ ent->d_name);
+ if (rv == -2)
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot get num_closids from resctrl cache info"));
+ if (rv < 0)
+ goto cleanup;
+
+ rv = virFileReadValueString(&i_type->cbm_mask,
+ SYSFS_RESCTRL_PATH
+ "/info/%s/cbm_mask",
+ ent->d_name);
+ if (rv == -2)
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot get cbm_mask from resctrl cache info"));
+ if (rv < 0)
+ goto cleanup;
+
+ rv = virFileReadValueUint(&i_type->min_cbm_bits,
+ SYSFS_RESCTRL_PATH "/info/%s/min_cbm_bits",
+ ent->d_name);
+ if (rv == -2)
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot get min_cbm_bits from resctrl cache info"));
+ if (rv < 0)
+ goto cleanup;
+
+ virStringTrimOptionalNewline(i_type->cbm_mask);
Move this up a few lines to after cbm_mask successfully read...
Post by Martin Kletzander
+
+ if (resctrl->nlevels <= level &&
+ VIR_EXPAND_N(resctrl->levels, resctrl->nlevels,
+ level - resctrl->nlevels + 1) < 0)
+ goto cleanup;
+
+ if (!resctrl->levels[level] &&
+ (VIR_ALLOC(resctrl->levels[level]) < 0 ||
+ VIR_ALLOC_N(resctrl->levels[level]->types, VIR_CACHE_TYPE_LAST) < 0))
+ goto cleanup;
+ i_level = resctrl->levels[level];
+
+ if (i_level->types[type]) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Duplicate cache type in resctrlfs for level %u"),
+ level);
+ goto cleanup;
+ }
+
+ for (tmp_str = i_type->cbm_mask; *tmp_str != '\0'; tmp_str++) {
+ if (!c_isxdigit(*tmp_str)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot parse cbm_mask from resctrl cache info"));
+ goto cleanup;
+ }
+
+ i_type->bits += count_one_bits(virHexToBin(*tmp_str));
+ }
+
+ VIR_STEAL_PTR(i_level->types[type], i_type);
+ }
+
+ ret = 0;
+ VIR_DIR_CLOSE(dirp);
+ VIR_FREE(info_path);
+ VIR_FREE(i_type);
In a cleanup/failure path i_type.cbm_mask is leaked, thus before this

if (i_type)
VIR_FREE(i_type->cbm_mask);

or go with the VIR_STEAL_PTR type logic for a local @cbm_mask to be
stored in i_type->cbm_mask (IDC whichever way you prefer)
Post by Martin Kletzander
+ return ret;
+}
+
+
+int
+virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
+ unsigned int level,
+ unsigned long long size,
+ size_t *ncontrols,
+ virResctrlInfoPerCachePtr **controls)
+{
+ virResctrlInfoPerLevelPtr i_level = NULL;
+ virResctrlInfoPerTypePtr i_type = NULL;
+ size_t i = 0;
+ int ret = -1;
+
+ if (virResctrlInfoIsEmpty(resctrl))
+ return 0;
+
+ if (level >= resctrl->nlevels)
+ return 0;
+
+ i_level = resctrl->levels[level];
+ if (!i_level)
+ return 0;
+
+ for (i = 0; i < VIR_CACHE_TYPE_LAST; i++) {
+ i_type = i_level->types[i];
+ if (!i_type)
+ continue;
+
+ /* Let's take the opportunity to update our internal information about
+ * the cache size */
+ if (!i_type->size) {
Not really a boolean or pointer comparison...
Post by Martin Kletzander
+ i_type->size = size;
+ i_type->control.granularity = size / i_type->bits;
+ if (i_type->min_cbm_bits != 1)
+ i_type->control.min = i_type->min_cbm_bits * i_type->control.granularity;
+ } else {
+ if (i_type->size != size) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Forbidden inconsistency for resctrlfs, "
+ "level %u caches have different sizes"),
level %u cache size %llu not match expected %llu
levek, i_type->size, size
Post by Martin Kletzander
+ level);
+ goto error;
+ }
+ i_type->max_cache_id++;
+ }
+
+ if (VIR_EXPAND_N(*controls, *ncontrols, 1) < 0)
+ goto error;
+ if (VIR_ALLOC((*controls)[*ncontrols - 1]) < 0)
+ goto error;
+
+ memcpy((*controls)[*ncontrols - 1], &i_type->control, sizeof(i_type->control));
+ }
+
+ ret = 0;
+ return ret;
Is this really necessary? The caller on failure will call
virCapsHostCacheBankFree(bank) which does the free's.
Post by Martin Kletzander
+ while (*ncontrols)
+ VIR_FREE((*controls)[--*ncontrols]);
+ VIR_FREE(*controls);
+ goto cleanup;
+}
+
+
int
virResctrlGetCacheInfo(unsigned int level,
unsigned long long size,
diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
index 42e852780318..43063903730b 100644
--- a/src/util/virresctrl.h
+++ b/src/util/virresctrl.h
@@ -49,7 +49,26 @@ struct _virResctrlInfoPerCache {
unsigned int max_allocation;
};
+typedef struct _virResctrlInfo virResctrlInfo;
+typedef virResctrlInfo *virResctrlInfoPtr;
+virResctrlInfoPtr
+virResctrlInfoNew(void);
+
+bool
+virResctrlInfoIsEmpty(virResctrlInfoPtr resctrl);
+
+int
+virResctrlGetInfo(virResctrlInfoPtr resctrl);
+
+int
+virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
+ unsigned int level,
+ unsigned long long size,
+ size_t *ncontrols,
+ virResctrlInfoPerCachePtr **controls);
+
+/* To be removed */
Superfluous, but IDC

Reviewed-by: John Ferlan <***@redhat.com>

John
Post by Martin Kletzander
int
virResctrlGetCacheInfo(unsigned int level,
unsigned long long size,
Martin Kletzander
2018-01-22 14:23:46 UTC
Permalink
Post by Martin Kletzander
Post by Martin Kletzander
This will make the current functions obsolete and it will provide more
information to the virresctrl module so that it can be used later.
---
po/POTFILES.in | 1 +
src/libvirt_private.syms | 3 +
src/util/virresctrl.c | 301 +++++++++++++++++++++++++++++++++++++++++++++++
src/util/virresctrl.h | 19 +++
4 files changed, 324 insertions(+)
diff --git a/po/POTFILES.in b/po/POTFILES.in
index c1fa23427eff..8382ee633621 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -252,6 +252,7 @@ src/util/virportallocator.c
src/util/virprocess.c
src/util/virqemu.c
src/util/virrandom.c
+src/util/virresctrl.c
src/util/virrotatingfile.c
src/util/virscsi.c
src/util/virscsihost.c
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index de4ec4d442c9..75be612a2f13 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2550,6 +2550,9 @@ virCacheTypeFromString;
virCacheTypeToString;
virResctrlGetCacheControlType;
virResctrlGetCacheInfo;
+virResctrlGetInfo;
+virResctrlInfoGetCache;
+virResctrlInfoNew;
# util/virrotatingfile.h
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 050a08178e24..6fd1ceb587cf 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -25,12 +25,15 @@
#include "viralloc.h"
#include "virfile.h"
#include "virlog.h"
+#include "virobject.h"
#include "virstring.h"
#define VIR_FROM_THIS VIR_FROM_RESCTRL
VIR_LOG_INIT("util.virresctrl")
+
+/* Common definitions */
#define SYSFS_RESCTRL_PATH "/sys/fs/resctrl"
/* Resctrl is short for Resource Control. It might be implemented for various
@@ -55,6 +58,304 @@ VIR_ENUM_IMPL(virResctrl, VIR_CACHE_TYPE_LAST,
"CODE",
"DATA")
+
+/* Info-related definitions and InfoClass-related functions */
+typedef struct _virResctrlInfoPerType virResctrlInfoPerType;
+typedef virResctrlInfoPerType *virResctrlInfoPerTypePtr;
+struct _virResctrlInfoPerType {
+ /* Kernel-provided information */
+ char *cbm_mask;
+ unsigned int min_cbm_bits;
+
+ /* Our computed information from the above */
+ unsigned int bits;
+ unsigned int max_cache_id;
+
+ /* In order to be self-sufficient we need size information per cache.
+ * Funnily enough, one of the outcomes of the resctrlfs design is that it
+ * does not account for different sizes per cache on the same level. So
+ * for the sake of easiness, let's copy that, for now. */
+ unsigned long long size;
+
+ /* Information that we will return upon request (this is public struct) as
+ * until now all the above is internal to this module */
+ virResctrlInfoPerCache control;
+};
+
+typedef struct _virResctrlInfoPerLevel virResctrlInfoPerLevel;
+typedef virResctrlInfoPerLevel *virResctrlInfoPerLevelPtr;
+struct _virResctrlInfoPerLevel {
+ virResctrlInfoPerTypePtr *types;
+};
+
+struct _virResctrlInfo {
+ virObject parent;
+
+ virResctrlInfoPerLevelPtr *levels;
+ size_t nlevels;
+};
+
+static virClassPtr virResctrlInfoClass;
+
+static void
+virResctrlInfoDispose(void *obj)
+{
+ size_t i = 0;
+ size_t j = 0;
+
+ virResctrlInfoPtr resctrl = obj;
+
+ for (i = 0; i < resctrl->nlevels; i++) {
+ virResctrlInfoPerLevelPtr level = resctrl->levels[i];
+
+ if (!level)
+ continue;
+
+ if (level->types) {
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ if (level->types[j])
+ VIR_FREE(level->types[j]->cbm_mask);
+ VIR_FREE(level->types[j]);
+ }
+ }
+ VIR_FREE(level->types);
+ VIR_FREE(level);
+ }
+
+ VIR_FREE(resctrl->levels);
+}
+
+
+static int virResctrlInfoOnceInit(void)
static int
virRes....
Post by Martin Kletzander
+{
+ if (!(virResctrlInfoClass = virClassNew(virClassForObject(),
+ "virResctrlInfo",
+ sizeof(virResctrlInfo),
+ virResctrlInfoDispose)))
+ return -1;
+
+ return 0;
+}
+
+
+VIR_ONCE_GLOBAL_INIT(virResctrlInfo)
+
+
+virResctrlInfoPtr
+virResctrlInfoNew(void)
+{
+ if (virResctrlInfoInitialize() < 0)
+ return NULL;
+
+ return virObjectNew(virResctrlInfoClass);
+}
+
+
+/* Info-related functions */
+bool
+virResctrlInfoIsEmpty(virResctrlInfoPtr resctrl)
+{
+ size_t i = 0;
+ size_t j = 0;
+
+ if (!resctrl)
+ return true;
+
+ for (i = 0; i < resctrl->nlevels; i++) {
+ virResctrlInfoPerLevelPtr i_level = resctrl->levels[i];
+
+ if (!i_level)
+ continue;
+
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ if (i_level->types[j])
+ return false;
+ }
+ }
+
+ return true;
+}
+
Two blank lines
Post by Martin Kletzander
+int
+virResctrlGetInfo(virResctrlInfoPtr resctrl)
+{
+ DIR *dirp = NULL;
+ char *info_path = NULL;
+ char *endptr = NULL;
+ char *tmp_str = NULL;
+ int ret = -1;
+ int rv = -1;
+ int type = 0;
+ struct dirent *ent = NULL;
+ unsigned int level = 0;
+ virResctrlInfoPerLevelPtr i_level = NULL;
+ virResctrlInfoPerTypePtr i_type = NULL;
+
+ rv = virDirOpenIfExists(&dirp, SYSFS_RESCTRL_PATH "/info");
+ if (rv <= 0) {
+ ret = rv;
+ goto cleanup;
+ }
+
+ while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH "/info")) > 0) {
+ if (ent->d_type != DT_DIR)
+ continue;
+
+ if (ent->d_name[0] != 'L')
+ continue;
+
+ if (virStrToLong_uip(ent->d_name + 1, &endptr, 10, &level) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot parse resctrl cache info level"));
+ goto cleanup;
+ }
+
+ type = virResctrlTypeFromString(endptr);
+ if (type < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot parse resctrl cache info type"));
+ goto cleanup;
+ }
+
+ if (VIR_ALLOC(i_type) < 0)
+ goto cleanup;
+
+ i_type->control.scope = type;
+
+ rv = virFileReadValueUint(&i_type->control.max_allocation,
+ SYSFS_RESCTRL_PATH "/info/%s/num_closids",
+ ent->d_name);
+ if (rv == -2)
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot get num_closids from resctrl cache info"));
+ if (rv < 0)
+ goto cleanup;
+
+ rv = virFileReadValueString(&i_type->cbm_mask,
+ SYSFS_RESCTRL_PATH
+ "/info/%s/cbm_mask",
+ ent->d_name);
+ if (rv == -2)
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot get cbm_mask from resctrl cache info"));
+ if (rv < 0)
+ goto cleanup;
+
+ rv = virFileReadValueUint(&i_type->min_cbm_bits,
+ SYSFS_RESCTRL_PATH "/info/%s/min_cbm_bits",
+ ent->d_name);
+ if (rv == -2)
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot get min_cbm_bits from resctrl cache info"));
+ if (rv < 0)
+ goto cleanup;
+
+ virStringTrimOptionalNewline(i_type->cbm_mask);
Move this up a few lines to after cbm_mask successfully read...
Post by Martin Kletzander
+
+ if (resctrl->nlevels <= level &&
+ VIR_EXPAND_N(resctrl->levels, resctrl->nlevels,
+ level - resctrl->nlevels + 1) < 0)
+ goto cleanup;
+
+ if (!resctrl->levels[level] &&
+ (VIR_ALLOC(resctrl->levels[level]) < 0 ||
+ VIR_ALLOC_N(resctrl->levels[level]->types, VIR_CACHE_TYPE_LAST) < 0))
+ goto cleanup;
+ i_level = resctrl->levels[level];
+
+ if (i_level->types[type]) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Duplicate cache type in resctrlfs for level %u"),
+ level);
+ goto cleanup;
+ }
+
+ for (tmp_str = i_type->cbm_mask; *tmp_str != '\0'; tmp_str++) {
+ if (!c_isxdigit(*tmp_str)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot parse cbm_mask from resctrl cache info"));
+ goto cleanup;
+ }
+
+ i_type->bits += count_one_bits(virHexToBin(*tmp_str));
+ }
+
+ VIR_STEAL_PTR(i_level->types[type], i_type);
+ }
+
+ ret = 0;
+ VIR_DIR_CLOSE(dirp);
+ VIR_FREE(info_path);
+ VIR_FREE(i_type);
In a cleanup/failure path i_type.cbm_mask is leaked, thus before this
if (i_type)
VIR_FREE(i_type->cbm_mask);
stored in i_type->cbm_mask (IDC whichever way you prefer)
Post by Martin Kletzander
+ return ret;
+}
+
+
+int
+virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
+ unsigned int level,
+ unsigned long long size,
+ size_t *ncontrols,
+ virResctrlInfoPerCachePtr **controls)
+{
+ virResctrlInfoPerLevelPtr i_level = NULL;
+ virResctrlInfoPerTypePtr i_type = NULL;
+ size_t i = 0;
+ int ret = -1;
+
+ if (virResctrlInfoIsEmpty(resctrl))
+ return 0;
+
+ if (level >= resctrl->nlevels)
+ return 0;
+
+ i_level = resctrl->levels[level];
+ if (!i_level)
+ return 0;
+
+ for (i = 0; i < VIR_CACHE_TYPE_LAST; i++) {
+ i_type = i_level->types[i];
+ if (!i_type)
+ continue;
+
+ /* Let's take the opportunity to update our internal information about
+ * the cache size */
+ if (!i_type->size) {
Not really a boolean or pointer comparison...
Post by Martin Kletzander
+ i_type->size = size;
+ i_type->control.granularity = size / i_type->bits;
+ if (i_type->min_cbm_bits != 1)
+ i_type->control.min = i_type->min_cbm_bits * i_type->control.granularity;
+ } else {
+ if (i_type->size != size) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Forbidden inconsistency for resctrlfs, "
+ "level %u caches have different sizes"),
level %u cache size %llu not match expected %llu
levek, i_type->size, size
Post by Martin Kletzander
+ level);
+ goto error;
+ }
+ i_type->max_cache_id++;
+ }
+
+ if (VIR_EXPAND_N(*controls, *ncontrols, 1) < 0)
+ goto error;
+ if (VIR_ALLOC((*controls)[*ncontrols - 1]) < 0)
+ goto error;
+
+ memcpy((*controls)[*ncontrols - 1], &i_type->control, sizeof(i_type->control));
+ }
+
+ ret = 0;
+ return ret;
Is this really necessary? The caller on failure will call
virCapsHostCacheBankFree(bank) which does the free's.
It's not, but it's good coding practice not leaving anything (that you were
changing/allocating) in an inconsistent state when exiting a function.

Fixed everything else.
Martin Kletzander
2017-12-13 15:39:28 UTC
Permalink
More info in the documentation, this is basically the XML parsing/formatting
support, schemas, tests and documentation for the new cputune/cachetune element
that will get used by following patches.

Signed-off-by: Martin Kletzander <***@redhat.com>
---
docs/formatdomain.html.in | 54 +++++
docs/schemas/domaincommon.rng | 32 +++
src/conf/domain_conf.c | 251 +++++++++++++++++++++
src/conf/domain_conf.h | 13 ++
tests/genericxml2xmlindata/cachetune-cdp.xml | 36 +++
.../cachetune-colliding-allocs.xml | 30 +++
.../cachetune-colliding-tunes.xml | 32 +++
.../cachetune-colliding-types.xml | 30 +++
tests/genericxml2xmlindata/cachetune-small.xml | 29 +++
tests/genericxml2xmlindata/cachetune.xml | 33 +++
tests/genericxml2xmltest.c | 10 +
11 files changed, 550 insertions(+)
create mode 100644 tests/genericxml2xmlindata/cachetune-cdp.xml
create mode 100644 tests/genericxml2xmlindata/cachetune-colliding-allocs.xml
create mode 100644 tests/genericxml2xmlindata/cachetune-colliding-tunes.xml
create mode 100644 tests/genericxml2xmlindata/cachetune-colliding-types.xml
create mode 100644 tests/genericxml2xmlindata/cachetune-small.xml
create mode 100644 tests/genericxml2xmlindata/cachetune.xml

diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 01db83e60820..623860cc0e95 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -689,6 +689,10 @@
&lt;iothread_quota&gt;-1&lt;/iothread_quota&gt;
&lt;vcpusched vcpus='0-4,^3' scheduler='fifo' priority='1'/&gt;
&lt;iothreadsched iothreads='2' scheduler='batch'/&gt;
+ &lt;cachetune vcpus='0-3'&gt;
+ &lt;cache id='0' level='3' type='both' size='3' unit='MiB'/&gt;
+ &lt;cache id='1' level='3' type='both' size='3' unit='MiB'/&gt;
+ &lt;/cachetune&gt;
&lt;/cputune&gt;
...
&lt;/domain&gt;
@@ -834,6 +838,56 @@
<span class="since">Since 1.2.13</span>
</dd>

+ <dt><code>cachetune</code><span class="since">Since 3.10.0</span></dt>
+ <dd>
+ Optional <code>cachetune</code> element can control allocations for CPU
+ caches using the resctrlfs on the host. Whether or not is this supported
+ can be gathered from capabilities where some limitations like minimum
+ size and required granularity are reported as well. The required
+ attribute <code>vcpus</code> specifies to which vCPUs this allocation
+ applies. A vCPU can only be member of one <code>cachetune</code> element
+ allocations. Supported subelements are:
+ <dl>
+ <dt><code>cache</code></dt>
+ <dd>
+ This element controls the allocation of CPU cache and has the
+ following attributes:
+ <dl>
+ <dt><code>level</code></dt>
+ <dd>
+ Host cache level from which to allocate.
+ </dd>
+ <dt><code>id</code></dt>
+ <dd>
+ Host cache id from which to allocate.
+ </dd>
+ <dt><code>type</code></dt>
+ <dd>
+ Type of allocation. Can be <code>code</code> for code
+ (instructions), <code>data</code> for data or <code>both</code>
+ for both code and data (unified). Currently the allocation can
+ be done only with the same type as the host supports, meaning
+ you cannot request <code>both</code> for host with CDP
+ (code/data prioritization) enabled.
+ </dd>
+ <dt><code>size</code></dt>
+ <dd>
+ The size of the region to allocate. The value by default is in
+ bytes, but the <code>unit</code> attribute can be used to scale
+ the value.
+ </dd>
+ <dt><code>unit</code> (optional)</dt>
+ <dd>
+ If specified it is the unit such as KiB, MiB, GiB, or TiB
+ (described in the <code>memory</code> element
+ for <a href="#elementsMemoryAllocation">Memory Allocation</a>)
+ in which <code>size</code> is specified, defaults to bytes.
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+
+ </dd>
</dl>


diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index f22c932f6c09..252f58f4379c 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -900,6 +900,38 @@
<ref name="schedparam"/>
</element>
</zeroOrMore>
+ <zeroOrMore>
+ <element name="cachetune">
+ <attribute name="vcpus">
+ <ref name='cpuset'/>
+ </attribute>
+ <oneOrMore>
+ <element name="cache">
+ <attribute name="id">
+ <ref name='unsignedInt'/>
+ </attribute>
+ <attribute name="level">
+ <ref name='unsignedInt'/>
+ </attribute>
+ <attribute name="type">
+ <choice>
+ <value>both</value>
+ <value>code</value>
+ <value>data</value>
+ </choice>
+ </attribute>
+ <attribute name="size">
+ <ref name='unsignedLong'/>
+ </attribute>
+ <optional>
+ <attribute name='unit'>
+ <ref name='unit'/>
+ </attribute>
+ </optional>
+ </element>
+ </oneOrMore>
+ </element>
+ </zeroOrMore>
</interleave>
</element>
</define>
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 66e21c4bdbee..ec8d760c7971 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -2883,6 +2883,17 @@ virDomainLoaderDefFree(virDomainLoaderDefPtr loader)
VIR_FREE(loader);
}

+static void
+virDomainCachetuneDefFree(virDomainCachetuneDefPtr cachetune)
+{
+ if (!cachetune)
+ return;
+
+ virObjectUnref(cachetune->alloc);
+ virBitmapFree(cachetune->vcpus);
+ VIR_FREE(cachetune);
+}
+
void virDomainDefFree(virDomainDefPtr def)
{
size_t i;
@@ -3055,6 +3066,10 @@ void virDomainDefFree(virDomainDefPtr def)
virDomainShmemDefFree(def->shmems[i]);
VIR_FREE(def->shmems);

+ for (i = 0; i < def->ncachetunes; i++)
+ virDomainCachetuneDefFree(def->cachetunes[i]);
+ VIR_FREE(def->cachetunes);
+
VIR_FREE(def->keywrap);

if (def->namespaceData && def->ns.free)
@@ -18202,6 +18217,165 @@ virDomainDefParseBootOptions(virDomainDefPtr def,
}


+static int
+virDomainCachetuneDefParseCache(xmlXPathContextPtr ctxt,
+ xmlNodePtr node,
+ virResctrlAllocPtr alloc)
+{
+ xmlNodePtr oldnode = ctxt->node;
+ unsigned int level;
+ unsigned int cache;
+ int type;
+ unsigned long long size;
+ char *tmp = NULL;
+ int ret = -1;
+
+ ctxt->node = node;
+
+ tmp = virXMLPropString(node, "id");
+ if (!tmp) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("Missing cachetune attribute 'id'"));
+ goto cleanup;
+ }
+ if (virStrToLong_uip(tmp, NULL, 10, &cache) < 0) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("Invalid cachetune attribute 'id' value '%s'"),
+ tmp);
+ goto cleanup;
+ }
+ VIR_FREE(tmp);
+
+ tmp = virXMLPropString(node, "level");
+ if (!tmp) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("Missing cachetune attribute 'level'"));
+ goto cleanup;
+ }
+ if (virStrToLong_uip(tmp, NULL, 10, &level) < 0) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("Invalid cachetune attribute 'level' value '%s'"),
+ tmp);
+ goto cleanup;
+ }
+ VIR_FREE(tmp);
+
+ tmp = virXMLPropString(node, "type");
+ if (!tmp) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("Missing cachetune attribute 'type'"));
+ goto cleanup;
+ }
+ type = virCacheTypeFromString(tmp);
+ if (type < 0) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("Invalid cachetune attribute 'type' value '%s'"),
+ tmp);
+ goto cleanup;
+ }
+ VIR_FREE(tmp);
+
+ if (virDomainParseScaledValue("./@size", "./@unit",
+ ctxt, &size, 1024,
+ ULLONG_MAX, true) < 0)
+ goto cleanup;
+
+ if (virResctrlAllocSetSize(alloc, level, type, cache, size) < 0)
+ goto cleanup;
+
+ ret = 0;
+ cleanup:
+ ctxt->node = oldnode;
+ VIR_FREE(tmp);
+ return ret;
+}
+
+
+static int
+virDomainCachetuneDefParse(virDomainDefPtr def,
+ xmlXPathContextPtr ctxt,
+ xmlNodePtr node)
+{
+ xmlNodePtr oldnode = ctxt->node;
+ xmlNodePtr *nodes = NULL;
+ virBitmapPtr vcpus = NULL;
+ virResctrlAllocPtr alloc = virResctrlAllocNew();
+ virDomainCachetuneDefPtr tmp_cachetune = NULL;
+ char *tmp = NULL;
+ char *vcpus_str = NULL;
+ ssize_t i = 0;
+ int n;
+ int ret = -1;
+
+ ctxt->node = node;
+
+ if (!alloc)
+ goto cleanup;
+
+ if (VIR_ALLOC(tmp_cachetune) < 0)
+ goto cleanup;
+
+ vcpus_str = virXMLPropString(node, "vcpus");
+ if (!vcpus_str) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("Missing cachetune attribute 'vcpus'"));
+ goto cleanup;
+ }
+ if (virBitmapParse(vcpus_str, &vcpus, VIR_DOMAIN_CPUMASK_LEN) < 0) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("Invalid cachetune attribute 'vcpus' value '%s'"),
+ vcpus_str);
+ goto cleanup;
+ }
+
+ if ((n = virXPathNodeSet("./cache", ctxt, &nodes)) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot extract cache nodes under cachetune"));
+ goto cleanup;
+ }
+
+ for (i = 0; i < n; i++) {
+ if (virDomainCachetuneDefParseCache(ctxt, nodes[i], alloc) < 0)
+ goto cleanup;
+ }
+
+ if (virResctrlAllocIsEmpty(alloc)) {
+ ret = 0;
+ goto cleanup;
+ }
+
+ virBitmapShrink(vcpus, def->maxvcpus);
+
+ for (i = 0; i < def->ncachetunes; i++) {
+ if (virBitmapOverlaps(def->cachetunes[i]->vcpus, vcpus)) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("Overlapping vcpus in cachetunes"));
+ goto cleanup;
+ }
+ }
+
+ if (virResctrlAllocSetID(alloc, vcpus_str) < 0)
+ goto cleanup;
+
+ VIR_STEAL_PTR(tmp_cachetune->vcpus, vcpus);
+ VIR_STEAL_PTR(tmp_cachetune->alloc, alloc);
+
+ if (VIR_APPEND_ELEMENT(def->cachetunes, def->ncachetunes, tmp_cachetune) < 0)
+ goto cleanup;
+
+ ret = 0;
+ cleanup:
+ ctxt->node = oldnode;
+ virDomainCachetuneDefFree(tmp_cachetune);
+ virObjectUnref(alloc);
+ virBitmapFree(vcpus);
+ VIR_FREE(vcpus_str);
+ VIR_FREE(nodes);
+ VIR_FREE(tmp);
+ return ret;
+}
+
+
static virDomainDefPtr
virDomainDefParseXML(xmlDocPtr xml,
xmlNodePtr root,
@@ -18754,6 +18928,18 @@ virDomainDefParseXML(xmlDocPtr xml,
}
VIR_FREE(nodes);

+ if ((n = virXPathNodeSet("./cputune/cachetune", ctxt, &nodes)) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("cannot extract cachetune nodes"));
+ goto error;
+ }
+
+ for (i = 0; i < n; i++) {
+ if (virDomainCachetuneDefParse(def, ctxt, nodes[i]) < 0)
+ goto error;
+ }
+ VIR_FREE(nodes);
+
if (virCPUDefParseXML(ctxt, "./cpu[1]", VIR_CPU_TYPE_GUEST, &def->cpu) < 0)
goto error;

@@ -25666,6 +25852,68 @@ virDomainSchedulerFormat(virBufferPtr buf,
}


+struct virCachetuneHelperData {
+ virBufferPtr buf;
+ size_t vcpu_id;
+};
+
+static int
+virDomainCachetuneDefFormatHelper(unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size,
+ void *opaque)
+{
+ const char *unit;
+ virBufferPtr buf = opaque;
+ unsigned long long short_size = virFormatIntPretty(size, &unit);
+
+ virBufferAsprintf(buf,
+ "<cache id='%u' level='%u' type='%s' "
+ "size='%llu' unit='%s'/>\n",
+ cache, level, virCacheTypeToString(type),
+ short_size, unit);
+
+ return 0;
+}
+
+
+static int
+virDomainCachetuneDefFormat(virBufferPtr buf,
+ virDomainCachetuneDefPtr cachetune)
+{
+ virBuffer childrenBuf = VIR_BUFFER_INITIALIZER;
+ char *vcpus = NULL;
+ int ret = -1;
+
+ virBufferSetChildIndent(&childrenBuf, buf);
+ virResctrlAllocForeachSize(cachetune->alloc,
+ virDomainCachetuneDefFormatHelper,
+ &childrenBuf);
+
+
+ if (virBufferCheckError(&childrenBuf) < 0)
+ goto cleanup;
+
+ if (!virBufferUse(&childrenBuf)) {
+ ret = 0;
+ goto cleanup;
+ }
+
+ vcpus = virBitmapFormat(cachetune->vcpus);
+
+ virBufferAsprintf(buf, "<cachetune vcpus='%s'>\n", vcpus);
+ virBufferAddBuffer(buf, &childrenBuf);
+ virBufferAddLit(buf, "</cachetune>\n");
+
+ ret = 0;
+ cleanup:
+ virBufferFreeAndReset(&childrenBuf);
+ VIR_FREE(vcpus);
+ return ret;
+}
+
+
static int
virDomainCputuneDefFormat(virBufferPtr buf,
virDomainDefPtr def)
@@ -25767,6 +26015,9 @@ virDomainCputuneDefFormat(virBufferPtr buf,
def->iothreadids[i]->iothread_id);
}

+ for (i = 0; i < def->ncachetunes; i++)
+ virDomainCachetuneDefFormat(&childrenBuf, def->cachetunes[i]);
+
if (virBufferCheckError(&childrenBuf) < 0)
return -1;

diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 59f250ac96ce..f5f6aed82e54 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -56,6 +56,7 @@
# include "virperf.h"
# include "virtypedparam.h"
# include "virsavecookie.h"
+# include "virresctrl.h"

/* forward declarations of all device types, required by
* virDomainDeviceDef
@@ -2194,6 +2195,15 @@ struct _virDomainCputune {
};


+typedef struct _virDomainCachetuneDef virDomainCachetuneDef;
+typedef virDomainCachetuneDef *virDomainCachetuneDefPtr;
+
+struct _virDomainCachetuneDef {
+ virBitmapPtr vcpus;
+ virResctrlAllocPtr alloc;
+};
+
+
typedef struct _virDomainVcpuDef virDomainVcpuDef;
typedef virDomainVcpuDef *virDomainVcpuDefPtr;

@@ -2322,6 +2332,9 @@ struct _virDomainDef {

virDomainCputune cputune;

+ virDomainCachetuneDefPtr *cachetunes;
+ size_t ncachetunes;
+
virDomainNumaPtr numa;
virDomainResourceDefPtr resource;
virDomainIdMapDef idmap;
diff --git a/tests/genericxml2xmlindata/cachetune-cdp.xml b/tests/genericxml2xmlindata/cachetune-cdp.xml
new file mode 100644
index 000000000000..1331aad06e54
--- /dev/null
+++ b/tests/genericxml2xmlindata/cachetune-cdp.xml
@@ -0,0 +1,36 @@
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+ <memory unit='KiB'>219136</memory>
+ <currentMemory unit='KiB'>219136</currentMemory>
+ <vcpu placement='static'>2</vcpu>
+ <cputune>
+ <cachetune vcpus='0-1'>
+ <cache id='0' level='3' type='code' size='7680' unit='KiB'/>
+ <cache id='1' level='3' type='data' size='3840' unit='KiB'/>
+ </cachetune>
+ <cachetune vcpus='2'>
+ <cache id='1' level='3' type='code' size='6' unit='MiB'/>
+ </cachetune>
+ <cachetune vcpus='3'>
+ <cache id='1' level='3' type='data' size='6912' unit='KiB'/>
+ </cachetune>
+ </cputune>
+ <os>
+ <type arch='i686' machine='pc'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu-system-i686</emulator>
+ <controller type='usb' index='0'/>
+ <controller type='ide' index='0'/>
+ <controller type='pci' index='0' model='pci-root'/>
+ <input type='mouse' bus='ps2'/>
+ <input type='keyboard' bus='ps2'/>
+ <memballoon model='virtio'/>
+ </devices>
+</domain>
diff --git a/tests/genericxml2xmlindata/cachetune-colliding-allocs.xml b/tests/genericxml2xmlindata/cachetune-colliding-allocs.xml
new file mode 100644
index 000000000000..994f8fcf2a4e
--- /dev/null
+++ b/tests/genericxml2xmlindata/cachetune-colliding-allocs.xml
@@ -0,0 +1,30 @@
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+ <memory unit='KiB'>219136</memory>
+ <currentMemory unit='KiB'>219136</currentMemory>
+ <vcpu placement='static'>2</vcpu>
+ <cputune>
+ <cachetune vcpus='0'>
+ <cache id='0' level='3' type='code' size='12' unit='KiB'/>
+ <cache id='0' level='3' type='code' size='18' unit='KiB'/>
+ </cachetune>
+ </cputune>
+ <os>
+ <type arch='i686' machine='pc'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu-system-i686</emulator>
+ <controller type='usb' index='0'/>
+ <controller type='ide' index='0'/>
+ <controller type='pci' index='0' model='pci-root'/>
+ <input type='mouse' bus='ps2'/>
+ <input type='keyboard' bus='ps2'/>
+ <memballoon model='virtio'/>
+ </devices>
+</domain>
diff --git a/tests/genericxml2xmlindata/cachetune-colliding-tunes.xml b/tests/genericxml2xmlindata/cachetune-colliding-tunes.xml
new file mode 100644
index 000000000000..3d9db85b12d4
--- /dev/null
+++ b/tests/genericxml2xmlindata/cachetune-colliding-tunes.xml
@@ -0,0 +1,32 @@
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+ <memory unit='KiB'>219136</memory>
+ <currentMemory unit='KiB'>219136</currentMemory>
+ <vcpu placement='static'>2</vcpu>
+ <cputune>
+ <cachetune vcpus='0'>
+ <cache id='0' level='3' type='code' size='12' unit='KiB'/>
+ </cachetune>
+ <cachetune vcpus='0'>
+ <cache id='0' level='3' type='data' size='18' unit='KiB'/>
+ </cachetune>
+ </cputune>
+ <os>
+ <type arch='i686' machine='pc'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu-system-i686</emulator>
+ <controller type='usb' index='0'/>
+ <controller type='ide' index='0'/>
+ <controller type='pci' index='0' model='pci-root'/>
+ <input type='mouse' bus='ps2'/>
+ <input type='keyboard' bus='ps2'/>
+ <memballoon model='virtio'/>
+ </devices>
+</domain>
diff --git a/tests/genericxml2xmlindata/cachetune-colliding-types.xml b/tests/genericxml2xmlindata/cachetune-colliding-types.xml
new file mode 100644
index 000000000000..1a4e62cd17d3
--- /dev/null
+++ b/tests/genericxml2xmlindata/cachetune-colliding-types.xml
@@ -0,0 +1,30 @@
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+ <memory unit='KiB'>219136</memory>
+ <currentMemory unit='KiB'>219136</currentMemory>
+ <vcpu placement='static'>2</vcpu>
+ <cputune>
+ <cachetune vcpus='0-1'>
+ <cache id='0' level='3' type='both' size='12' unit='KiB'/>
+ <cache id='0' level='3' type='code' size='6' unit='KiB'/>
+ </cachetune>
+ </cputune>
+ <os>
+ <type arch='i686' machine='pc'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu-system-i686</emulator>
+ <controller type='usb' index='0'/>
+ <controller type='ide' index='0'/>
+ <controller type='pci' index='0' model='pci-root'/>
+ <input type='mouse' bus='ps2'/>
+ <input type='keyboard' bus='ps2'/>
+ <memballoon model='virtio'/>
+ </devices>
+</domain>
diff --git a/tests/genericxml2xmlindata/cachetune-small.xml b/tests/genericxml2xmlindata/cachetune-small.xml
new file mode 100644
index 000000000000..4b25490b3abb
--- /dev/null
+++ b/tests/genericxml2xmlindata/cachetune-small.xml
@@ -0,0 +1,29 @@
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+ <memory unit='KiB'>219136</memory>
+ <currentMemory unit='KiB'>219136</currentMemory>
+ <vcpu placement='static'>2</vcpu>
+ <cputune>
+ <cachetune vcpus='0-1'>
+ <cache id='0' level='3' type='both' size='768' unit='KiB'/>
+ </cachetune>
+ </cputune>
+ <os>
+ <type arch='i686' machine='pc'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu-system-i686</emulator>
+ <controller type='usb' index='0'/>
+ <controller type='ide' index='0'/>
+ <controller type='pci' index='0' model='pci-root'/>
+ <input type='mouse' bus='ps2'/>
+ <input type='keyboard' bus='ps2'/>
+ <memballoon model='virtio'/>
+ </devices>
+</domain>
diff --git a/tests/genericxml2xmlindata/cachetune.xml b/tests/genericxml2xmlindata/cachetune.xml
new file mode 100644
index 000000000000..0051410aec32
--- /dev/null
+++ b/tests/genericxml2xmlindata/cachetune.xml
@@ -0,0 +1,33 @@
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+ <memory unit='KiB'>219136</memory>
+ <currentMemory unit='KiB'>219136</currentMemory>
+ <vcpu placement='static'>2</vcpu>
+ <cputune>
+ <cachetune vcpus='0-1'>
+ <cache id='0' level='3' type='both' size='3' unit='MiB'/>
+ <cache id='1' level='3' type='both' size='3' unit='MiB'/>
+ </cachetune>
+ <cachetune vcpus='3'>
+ <cache id='0' level='3' type='both' size='3' unit='MiB'/>
+ </cachetune>
+ </cputune>
+ <os>
+ <type arch='i686' machine='pc'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu-system-i686</emulator>
+ <controller type='usb' index='0'/>
+ <controller type='ide' index='0'/>
+ <controller type='pci' index='0' model='pci-root'/>
+ <input type='mouse' bus='ps2'/>
+ <input type='keyboard' bus='ps2'/>
+ <memballoon model='virtio'/>
+ </devices>
+</domain>
diff --git a/tests/genericxml2xmltest.c b/tests/genericxml2xmltest.c
index deaf5bd9b823..496e9260fa86 100644
--- a/tests/genericxml2xmltest.c
+++ b/tests/genericxml2xmltest.c
@@ -130,6 +130,16 @@ mymain(void)
DO_TEST_FULL("chardev-reconnect-invalid-mode", 0, false,
TEST_COMPARE_DOM_XML2XML_RESULT_FAIL_PARSE);

+ DO_TEST("cachetune");
+ DO_TEST("cachetune-small");
+ DO_TEST("cachetune-cdp");
+ DO_TEST_FULL("cachetune-colliding-allocs", false, true,
+ TEST_COMPARE_DOM_XML2XML_RESULT_FAIL_PARSE);
+ DO_TEST_FULL("cachetune-colliding-tunes", false, true,
+ TEST_COMPARE_DOM_XML2XML_RESULT_FAIL_PARSE);
+ DO_TEST_FULL("cachetune-colliding-types", false, true,
+ TEST_COMPARE_DOM_XML2XML_RESULT_FAIL_PARSE);
+
virObjectUnref(caps);
virObjectUnref(xmlopt);
--
2.15.1
John Ferlan
2018-01-03 23:50:22 UTC
Permalink
Post by Martin Kletzander
More info in the documentation, this is basically the XML parsing/formatting
support, schemas, tests and documentation for the new cputune/cachetune element
that will get used by following patches.
---
docs/formatdomain.html.in | 54 +++++
docs/schemas/domaincommon.rng | 32 +++
src/conf/domain_conf.c | 251 +++++++++++++++++++++
src/conf/domain_conf.h | 13 ++
tests/genericxml2xmlindata/cachetune-cdp.xml | 36 +++
.../cachetune-colliding-allocs.xml | 30 +++
.../cachetune-colliding-tunes.xml | 32 +++
.../cachetune-colliding-types.xml | 30 +++
tests/genericxml2xmlindata/cachetune-small.xml | 29 +++
tests/genericxml2xmlindata/cachetune.xml | 33 +++
tests/genericxml2xmltest.c | 10 +
11 files changed, 550 insertions(+)
create mode 100644 tests/genericxml2xmlindata/cachetune-cdp.xml
create mode 100644 tests/genericxml2xmlindata/cachetune-colliding-allocs.xml
create mode 100644 tests/genericxml2xmlindata/cachetune-colliding-tunes.xml
create mode 100644 tests/genericxml2xmlindata/cachetune-colliding-types.xml
create mode 100644 tests/genericxml2xmlindata/cachetune-small.xml
create mode 100644 tests/genericxml2xmlindata/cachetune.xml
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 01db83e60820..623860cc0e95 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -689,6 +689,10 @@
...
@@ -834,6 +838,56 @@
<span class="since">Since 1.2.13</span>
</dd>
+ <dt><code>cachetune</code><span class="since">Since 3.10.0</span></dt>
At least 4.0.0 if not 4.1.0
Post by Martin Kletzander
+ <dd>
+ Optional <code>cachetune</code> element can control allocations for CPU
+ caches using the resctrlfs on the host. Whether or not is this supported
s/resctrlfs/Resource Control File System/

Although this defaults to /sys/fs/resctrl, I'm not sure if we want to
state the default or not. It *is* the path we will use though and I
don't believe we have a way to alter that default (yet).
Post by Martin Kletzander
+ can be gathered from capabilities where some limitations like minimum
+ size and required granularity are reported as well. The required
+ attribute <code>vcpus</code> specifies to which vCPUs this allocation
+ applies. A vCPU can only be member of one <code>cachetune</code> element
+ <dl>
+ <dt><code>cache</code></dt>
+ <dd>
+ This element controls the allocation of CPU cache and has the
+ <dl>
+ <dt><code>level</code></dt>
+ <dd>
+ Host cache level from which to allocate.
+ </dd>
+ <dt><code>id</code></dt>
+ <dd>
+ Host cache id from which to allocate.
Oh - should have read this first ;-)... Maybe rename @id internally to
hostCacheID... Shall I assume someone setting this up would know how to
determine how to get these values from the host?
Post by Martin Kletzander
+ </dd>
+ <dt><code>type</code></dt>
+ <dd>
+ Type of allocation. Can be <code>code</code> for code
+ (instructions), <code>data</code> for data or <code>both</code>
+ for both code and data (unified). Currently the allocation can
+ be done only with the same type as the host supports, meaning
+ you cannot request <code>both</code> for host with CDP
+ (code/data prioritization) enabled.
+ </dd>
+ <dt><code>size</code></dt>
+ <dd>
+ The size of the region to allocate. The value by default is in
+ bytes, but the <code>unit</code> attribute can be used to scale
+ the value.
+ </dd>
+ <dt><code>unit</code> (optional)</dt>
+ <dd>
+ If specified it is the unit such as KiB, MiB, GiB, or TiB
+ (described in the <code>memory</code> element
+ for <a href="#elementsMemoryAllocation">Memory Allocation</a>)
+ in which <code>size</code> is specified, defaults to bytes.
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+
+ </dd>
</dl>
[...]
Post by Martin Kletzander
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 66e21c4bdbee..ec8d760c7971 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -2883,6 +2883,17 @@ virDomainLoaderDefFree(virDomainLoaderDefPtr loader)
VIR_FREE(loader);
}
+static void
+virDomainCachetuneDefFree(virDomainCachetuneDefPtr cachetune)
+{
+ if (!cachetune)
+ return;
+
+ virObjectUnref(cachetune->alloc);
+ virBitmapFree(cachetune->vcpus);
+ VIR_FREE(cachetune);
+}
+
Two empty lines before and after
Post by Martin Kletzander
void virDomainDefFree(virDomainDefPtr def)
{
size_t i;
[...]
Post by Martin Kletzander
+struct virCachetuneHelperData {
+ virBufferPtr buf;
+ size_t vcpu_id;
+};
+
+static int
+virDomainCachetuneDefFormatHelper(unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size,
+ void *opaque)
+{
+ const char *unit;
+ virBufferPtr buf = opaque;
+ unsigned long long short_size = virFormatIntPretty(size, &unit);
+
+ virBufferAsprintf(buf,
+ "<cache id='%u' level='%u' type='%s' "
+ "size='%llu' unit='%s'/>\n",
+ cache, level, virCacheTypeToString(type),
+ short_size, unit);
A nit, but if the default is "B" and we don't require for parse, then do
we want to Format the output? IOW: why print unit='B'
Post by Martin Kletzander
+
+ return 0;
+}
+
+
[...]
Post by Martin Kletzander
diff --git a/tests/genericxml2xmlindata/cachetune-cdp.xml b/tests/genericxml2xmlindata/cachetune-cdp.xml
new file mode 100644
index 000000000000..1331aad06e54
--- /dev/null
+++ b/tests/genericxml2xmlindata/cachetune-cdp.xml
@@ -0,0 +1,36 @@
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+ <memory unit='KiB'>219136</memory>
+ <currentMemory unit='KiB'>219136</currentMemory>
+ <vcpu placement='static'>2</vcpu>
+ <cputune>
+ <cachetune vcpus='0-1'>
+ <cache id='0' level='3' type='code' size='7680' unit='KiB'/>
+ <cache id='1' level='3' type='data' size='3840' unit='KiB'/>
+ </cachetune>
+ <cachetune vcpus='2'>
+ <cache id='1' level='3' type='code' size='6' unit='MiB'/>
+ </cachetune>
+ <cachetune vcpus='3'>
+ <cache id='1' level='3' type='data' size='6912' unit='KiB'/>
+ </cachetune>
But there's only 2 vcpu's on this guest?
Post by Martin Kletzander
+ </cputune>
+ <os>
+ <type arch='i686' machine='pc'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu-system-i686</emulator>
+ <controller type='usb' index='0'/>
+ <controller type='ide' index='0'/>
+ <controller type='pci' index='0' model='pci-root'/>
+ <input type='mouse' bus='ps2'/>
+ <input type='keyboard' bus='ps2'/>
+ <memballoon model='virtio'/>
+ </devices>
+</domain>
[...]
Post by Martin Kletzander
diff --git a/tests/genericxml2xmlindata/cachetune.xml b/tests/genericxml2xmlindata/cachetune.xml
new file mode 100644
index 000000000000..0051410aec32
--- /dev/null
+++ b/tests/genericxml2xmlindata/cachetune.xml
@@ -0,0 +1,33 @@
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+ <memory unit='KiB'>219136</memory>
+ <currentMemory unit='KiB'>219136</currentMemory>
+ <vcpu placement='static'>2</vcpu>
+ <cputune>
+ <cachetune vcpus='0-1'>
+ <cache id='0' level='3' type='both' size='3' unit='MiB'/>
+ <cache id='1' level='3' type='both' size='3' unit='MiB'/>
+ </cachetune>
+ <cachetune vcpus='3'>
+ <cache id='0' level='3' type='both' size='3' unit='MiB'/>
+ </cachetune>
Another one with vcpusid > nvcpus defined above....

I'll give you my:

Reviewed-by: John Ferlan <***@redhat.com>

Since adjustments are simple going forward. I'm not sure how you should
handle the vcpusid value > nvcpus if that's an issue or not...

John
Post by Martin Kletzander
+ </cputune>
+ <os>
+ <type arch='i686' machine='pc'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu-system-i686</emulator>
+ <controller type='usb' index='0'/>
+ <controller type='ide' index='0'/>
+ <controller type='pci' index='0' model='pci-root'/>
+ <input type='mouse' bus='ps2'/>
+ <input type='keyboard' bus='ps2'/>
+ <memballoon model='virtio'/>
+ </devices>
+</domain>
Martin Kletzander
2018-01-23 10:07:20 UTC
Permalink
Post by John Ferlan
Post by Martin Kletzander
More info in the documentation, this is basically the XML parsing/formatting
support, schemas, tests and documentation for the new cputune/cachetune element
that will get used by following patches.
---
docs/formatdomain.html.in | 54 +++++
docs/schemas/domaincommon.rng | 32 +++
src/conf/domain_conf.c | 251 +++++++++++++++++++++
src/conf/domain_conf.h | 13 ++
tests/genericxml2xmlindata/cachetune-cdp.xml | 36 +++
.../cachetune-colliding-allocs.xml | 30 +++
.../cachetune-colliding-tunes.xml | 32 +++
.../cachetune-colliding-types.xml | 30 +++
tests/genericxml2xmlindata/cachetune-small.xml | 29 +++
tests/genericxml2xmlindata/cachetune.xml | 33 +++
tests/genericxml2xmltest.c | 10 +
11 files changed, 550 insertions(+)
create mode 100644 tests/genericxml2xmlindata/cachetune-cdp.xml
create mode 100644 tests/genericxml2xmlindata/cachetune-colliding-allocs.xml
create mode 100644 tests/genericxml2xmlindata/cachetune-colliding-tunes.xml
create mode 100644 tests/genericxml2xmlindata/cachetune-colliding-types.xml
create mode 100644 tests/genericxml2xmlindata/cachetune-small.xml
create mode 100644 tests/genericxml2xmlindata/cachetune.xml
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 01db83e60820..623860cc0e95 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -689,6 +689,10 @@
...
@@ -834,6 +838,56 @@
<span class="since">Since 1.2.13</span>
</dd>
+ <dt><code>cachetune</code><span class="since">Since 3.10.0</span></dt>
At least 4.0.0 if not 4.1.0
Post by Martin Kletzander
+ <dd>
+ Optional <code>cachetune</code> element can control allocations for CPU
+ caches using the resctrlfs on the host. Whether or not is this supported
s/resctrlfs/Resource Control File System/
If you search for Resource Control File System you will find lot of unrelevant
stuff. For resctrlfs (or rather resctrl), however, you will get the precise
info you are looking for. It's also called resctrl in the kernel. You mount it
by doing `mount -t resctrl resctrl [-o cdp] /sys/fs/resctrl`, so I think it's
clearer this way (or without the fs at the end).
Post by John Ferlan
Although this defaults to /sys/fs/resctrl, I'm not sure if we want to
state the default or not. It *is* the path we will use though and I
don't believe we have a way to alter that default (yet).
We don't. We don't mention any other sysfs path in the docs. Why would we?
It's defined elsewhere, not in libvirt. And since it's the only place to be
mounted for now (according to the docs) we can have it hardcoded. Just like the
other paths.
Post by John Ferlan
Post by Martin Kletzander
+ can be gathered from capabilities where some limitations like minimum
+ size and required granularity are reported as well. The required
+ attribute <code>vcpus</code> specifies to which vCPUs this allocation
+ applies. A vCPU can only be member of one <code>cachetune</code> element
+ <dl>
+ <dt><code>cache</code></dt>
+ <dd>
+ This element controls the allocation of CPU cache and has the
+ <dl>
+ <dt><code>level</code></dt>
+ <dd>
+ Host cache level from which to allocate.
+ </dd>
+ <dt><code>id</code></dt>
+ <dd>
+ Host cache id from which to allocate.
hostCacheID... Shall I assume someone setting this up would know how to
determine how to get these values from the host?
I don't see the point in that. Especially since it's called `cache` in the code.
Post by John Ferlan
Post by Martin Kletzander
+ </dd>
+ <dt><code>type</code></dt>
+ <dd>
+ Type of allocation. Can be <code>code</code> for code
+ (instructions), <code>data</code> for data or <code>both</code>
+ for both code and data (unified). Currently the allocation can
+ be done only with the same type as the host supports, meaning
+ you cannot request <code>both</code> for host with CDP
+ (code/data prioritization) enabled.
+ </dd>
+ <dt><code>size</code></dt>
+ <dd>
+ The size of the region to allocate. The value by default is in
+ bytes, but the <code>unit</code> attribute can be used to scale
+ the value.
+ </dd>
+ <dt><code>unit</code> (optional)</dt>
+ <dd>
+ If specified it is the unit such as KiB, MiB, GiB, or TiB
+ (described in the <code>memory</code> element
+ for <a href="#elementsMemoryAllocation">Memory Allocation</a>)
+ in which <code>size</code> is specified, defaults to bytes.
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+
+ </dd>
</dl>
[...]
Post by Martin Kletzander
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 66e21c4bdbee..ec8d760c7971 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -2883,6 +2883,17 @@ virDomainLoaderDefFree(virDomainLoaderDefPtr loader)
VIR_FREE(loader);
}
+static void
+virDomainCachetuneDefFree(virDomainCachetuneDefPtr cachetune)
+{
+ if (!cachetune)
+ return;
+
+ virObjectUnref(cachetune->alloc);
+ virBitmapFree(cachetune->vcpus);
+ VIR_FREE(cachetune);
+}
+
Two empty lines before and after
This will look awkward when other functions around have one only, but I've
already given up this fight.
Post by John Ferlan
Post by Martin Kletzander
void virDomainDefFree(virDomainDefPtr def)
{
size_t i;
[...]
Post by Martin Kletzander
+struct virCachetuneHelperData {
+ virBufferPtr buf;
+ size_t vcpu_id;
+};
+
+static int
+virDomainCachetuneDefFormatHelper(unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size,
+ void *opaque)
+{
+ const char *unit;
+ virBufferPtr buf = opaque;
+ unsigned long long short_size = virFormatIntPretty(size, &unit);
+
+ virBufferAsprintf(buf,
+ "<cache id='%u' level='%u' type='%s' "
+ "size='%llu' unit='%s'/>\n",
+ cache, level, virCacheTypeToString(type),
+ short_size, unit);
A nit, but if the default is "B" and we don't require for parse, then do
we want to Format the output? IOW: why print unit='B'
1) Because that way we can default to something else in the future. We
want to specify everything possible, otherwise we'll get stuck with
something we can't change.

2) All other code does that, it's consistent.

3) It's more readable for users even without reading the docs.

4) It would be more code with no useful output.
Post by John Ferlan
Post by Martin Kletzander
+
+ return 0;
+}
+
+
[...]
Post by Martin Kletzander
diff --git a/tests/genericxml2xmlindata/cachetune-cdp.xml b/tests/genericxml2xmlindata/cachetune-cdp.xml
new file mode 100644
index 000000000000..1331aad06e54
--- /dev/null
+++ b/tests/genericxml2xmlindata/cachetune-cdp.xml
@@ -0,0 +1,36 @@
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+ <memory unit='KiB'>219136</memory>
+ <currentMemory unit='KiB'>219136</currentMemory>
+ <vcpu placement='static'>2</vcpu>
+ <cputune>
+ <cachetune vcpus='0-1'>
+ <cache id='0' level='3' type='code' size='7680' unit='KiB'/>
+ <cache id='1' level='3' type='data' size='3840' unit='KiB'/>
+ </cachetune>
+ <cachetune vcpus='2'>
+ <cache id='1' level='3' type='code' size='6' unit='MiB'/>
+ </cachetune>
+ <cachetune vcpus='3'>
+ <cache id='1' level='3' type='data' size='6912' unit='KiB'/>
+ </cachetune>
But there's only 2 vcpu's on this guest?
Sure, this will be shrunk and forgotten.
Post by John Ferlan
Post by Martin Kletzander
+ </cputune>
+ <os>
+ <type arch='i686' machine='pc'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu-system-i686</emulator>
+ <controller type='usb' index='0'/>
+ <controller type='ide' index='0'/>
+ <controller type='pci' index='0' model='pci-root'/>
+ <input type='mouse' bus='ps2'/>
+ <input type='keyboard' bus='ps2'/>
+ <memballoon model='virtio'/>
+ </devices>
+</domain>
[...]
Post by Martin Kletzander
diff --git a/tests/genericxml2xmlindata/cachetune.xml b/tests/genericxml2xmlindata/cachetune.xml
new file mode 100644
index 000000000000..0051410aec32
--- /dev/null
+++ b/tests/genericxml2xmlindata/cachetune.xml
@@ -0,0 +1,33 @@
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+ <memory unit='KiB'>219136</memory>
+ <currentMemory unit='KiB'>219136</currentMemory>
+ <vcpu placement='static'>2</vcpu>
+ <cputune>
+ <cachetune vcpus='0-1'>
+ <cache id='0' level='3' type='both' size='3' unit='MiB'/>
+ <cache id='1' level='3' type='both' size='3' unit='MiB'/>
+ </cachetune>
+ <cachetune vcpus='3'>
+ <cache id='0' level='3' type='both' size='3' unit='MiB'/>
+ </cachetune>
Another one with vcpusid > nvcpus defined above....
Since adjustments are simple going forward. I'm not sure how you should
handle the vcpusid value > nvcpus if that's an issue or not...
I have to send another version anyway. Not only because
virCachetuneHelperData is not used at all, but other things need
adjustments.
Post by John Ferlan
John
Post by Martin Kletzander
+ </cputune>
+ <os>
+ <type arch='i686' machine='pc'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu-system-i686</emulator>
+ <controller type='usb' index='0'/>
+ <controller type='ide' index='0'/>
+ <controller type='pci' index='0' model='pci-root'/>
+ <input type='mouse' bus='ps2'/>
+ <input type='keyboard' bus='ps2'/>
+ <memballoon model='virtio'/>
+ </devices>
+</domain>
John Ferlan
2018-01-23 15:44:03 UTC
Permalink
Post by Martin Kletzander
Post by John Ferlan
Post by Martin Kletzander
More info in the documentation, this is basically the XML
parsing/formatting
support, schemas, tests and documentation for the new
cputune/cachetune element
that will get used by following patches.
---
 docs/formatdomain.html.in                          |  54 +++++
 docs/schemas/domaincommon.rng                      |  32 +++
 src/conf/domain_conf.c                             | 251
+++++++++++++++++++++
 src/conf/domain_conf.h                             |  13 ++
 tests/genericxml2xmlindata/cachetune-cdp.xml       |  36 +++
 .../cachetune-colliding-allocs.xml                 |  30 +++
 .../cachetune-colliding-tunes.xml                  |  32 +++
 .../cachetune-colliding-types.xml                  |  30 +++
 tests/genericxml2xmlindata/cachetune-small.xml     |  29 +++
 tests/genericxml2xmlindata/cachetune.xml           |  33 +++
 tests/genericxml2xmltest.c                         |  10 +
 11 files changed, 550 insertions(+)
 create mode 100644 tests/genericxml2xmlindata/cachetune-cdp.xml
 create mode 100644
tests/genericxml2xmlindata/cachetune-colliding-allocs.xml
 create mode 100644
tests/genericxml2xmlindata/cachetune-colliding-tunes.xml
 create mode 100644
tests/genericxml2xmlindata/cachetune-colliding-types.xml
 create mode 100644 tests/genericxml2xmlindata/cachetune-small.xml
 create mode 100644 tests/genericxml2xmlindata/cachetune.xml
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 01db83e60820..623860cc0e95 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -689,6 +689,10 @@
   ...
@@ -834,6 +838,56 @@
         <span class="since">Since 1.2.13</span>
       </dd>
+      <dt><code>cachetune</code><span class="since">Since
3.10.0</span></dt>
At least 4.0.0 if not 4.1.0
Post by Martin Kletzander
+      <dd>
+        Optional <code>cachetune</code> element can control
allocations for CPU
+        caches using the resctrlfs on the host. Whether or not is
this supported
s/resctrlfs/Resource Control File System/
If you search for Resource Control File System you will find lot of unrelevant
stuff.  For resctrlfs (or rather resctrl), however, you will get the
precise
info you are looking for.  It's also called resctrl in the kernel.  You
mount it
by doing `mount -t resctrl resctrl [-o cdp] /sys/fs/resctrl`, so I think it's
clearer this way (or without the fs at the end).
OK that's fine not that big a deal.
Post by Martin Kletzander
Post by John Ferlan
Although this defaults to /sys/fs/resctrl, I'm not sure if we want to
state the default or not.  It *is* the path we will use though and I
don't believe we have a way to alter that default (yet).
We don't.  We don't mention any other sysfs path in the docs.  Why would
we?
It's defined elsewhere, not in libvirt.  And since it's the only place
to be
mounted for now (according to the docs) we can have it hardcoded.  Just
like the
other paths.
Post by John Ferlan
Post by Martin Kletzander
+        can be gathered from capabilities where some limitations
like minimum
+        size and required granularity are reported as well.  The
required
+        attribute <code>vcpus</code> specifies to which vCPUs this
allocation
+        applies. A vCPU can only be member of one
<code>cachetune</code> element
+        <dl>
+          <dt><code>cache</code></dt>
+          <dd>
+            This element controls the allocation of CPU cache and
has the
+            <dl>
+              <dt><code>level</code></dt>
+              <dd>
+                Host cache level from which to allocate.
+              </dd>
+              <dt><code>id</code></dt>
+              <dd>
+                Host cache id from which to allocate.
hostCacheID... Shall I assume someone setting this up would know how to
determine how to get these values from the host?
I don't see the point in that.  Especially since it's called `cache` in
the code.
Tough to recall - review done a month ago, but I have a recollection of
confusion while reading the code. Of course you've been working with it
longer so it's second nature. At this point I was perhaps reflecting a
bit too much.
Post by Martin Kletzander
Post by John Ferlan
Post by Martin Kletzander
+              </dd>
+              <dt><code>type</code></dt>
+              <dd>
+                Type of allocation.  Can be <code>code</code> for code
+                (instructions), <code>data</code> for data or
<code>both</code>
+                for both code and data (unified).  Currently the
allocation can
+                be done only with the same type as the host
supports, meaning
+                you cannot request <code>both</code> for host with CDP
+                (code/data prioritization) enabled.
+              </dd>
+              <dt><code>size</code></dt>
+              <dd>
+                The size of the region to allocate. The value by
default is in
+                bytes, but the <code>unit</code> attribute can be
used to scale
+                the value.
+              </dd>
+              <dt><code>unit</code> (optional)</dt>
+              <dd>
+                If specified it is the unit such as KiB, MiB, GiB,
or TiB
+                (described in the <code>memory</code> element
+                for <a href="#elementsMemoryAllocation">Memory
Allocation</a>)
+                in which <code>size</code> is specified, defaults to
bytes.
+              </dd>
+            </dl>
+          </dd>
+        </dl>
+
+      </dd>
     </dl>
[...]
Post by Martin Kletzander
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 66e21c4bdbee..ec8d760c7971 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -2883,6 +2883,17 @@ virDomainLoaderDefFree(virDomainLoaderDefPtr loader)
     VIR_FREE(loader);
 }
+static void
+virDomainCachetuneDefFree(virDomainCachetuneDefPtr cachetune)
+{
+    if (!cachetune)
+        return;
+
+    virObjectUnref(cachetune->alloc);
+    virBitmapFree(cachetune->vcpus);
+    VIR_FREE(cachetune);
+}
+
Two empty lines before and after
This will look awkward when other functions around have one only, but I've
already given up this fight.
I cannot disagree and understand the desire to follow existing style,
but IIRC there's been another agreement at one point in time to go with
the double blank lines between functions. I'm so used to mentioning it
now that it's just habit. I'm sure I don't always follow it 100% in my
patches either. I mention it - if it's done it's done, if it's not,
well I noted it. It's not like there's a syntax-check rule or some
hacking.html requirement.
Post by Martin Kletzander
Post by John Ferlan
Post by Martin Kletzander
 void virDomainDefFree(virDomainDefPtr def)
 {
     size_t i;
[...]
Post by Martin Kletzander
+struct virCachetuneHelperData {
+    virBufferPtr buf;
+    size_t vcpu_id;
+};
+
+static int
+virDomainCachetuneDefFormatHelper(unsigned int level,
+                                  virCacheType type,
+                                  unsigned int cache,
+                                  unsigned long long size,
+                                  void *opaque)
+{
+    const char *unit;
+    virBufferPtr buf = opaque;
+    unsigned long long short_size = virFormatIntPretty(size, &unit);
+
+    virBufferAsprintf(buf,
+                      "<cache id='%u' level='%u' type='%s' "
+                      "size='%llu' unit='%s'/>\n",
+                      cache, level, virCacheTypeToString(type),
+                      short_size, unit);
A nit, but if the default is "B" and we don't require for parse, then do
we want to Format the output?  IOW: why print unit='B'
1) Because that way we can default to something else in the future.  We
  want to specify everything possible, otherwise we'll get stuck with
  something we can't change.
2) All other code does that, it's consistent.
3) It's more readable for users even without reading the docs.
4) It would be more code with no useful output.
AOK... I guess it was more of a "reaction" to seeing 'B' (or 'bytes')
rather than 'KiB' or 'MiB' - as in "yeah so".


John
Post by Martin Kletzander
Post by John Ferlan
Post by Martin Kletzander
+
+    return 0;
+}
+
+
[...]
Post by Martin Kletzander
diff --git a/tests/genericxml2xmlindata/cachetune-cdp.xml
b/tests/genericxml2xmlindata/cachetune-cdp.xml
new file mode 100644
index 000000000000..1331aad06e54
--- /dev/null
+++ b/tests/genericxml2xmlindata/cachetune-cdp.xml
@@ -0,0 +1,36 @@
+<domain type='qemu'>
+  <name>QEMUGuest1</name>
+  <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+  <memory unit='KiB'>219136</memory>
+  <currentMemory unit='KiB'>219136</currentMemory>
+  <vcpu placement='static'>2</vcpu>
+  <cputune>
+    <cachetune vcpus='0-1'>
+      <cache id='0' level='3' type='code' size='7680' unit='KiB'/>
+      <cache id='1' level='3' type='data' size='3840' unit='KiB'/>
+    </cachetune>
+    <cachetune vcpus='2'>
+      <cache id='1' level='3' type='code' size='6' unit='MiB'/>
+    </cachetune>
+    <cachetune vcpus='3'>
+      <cache id='1' level='3' type='data' size='6912' unit='KiB'/>
+    </cachetune>
But there's only 2 vcpu's on this guest?
Sure, this will be shrunk and forgotten.
Post by John Ferlan
Post by Martin Kletzander
+  </cputune>
+  <os>
+    <type arch='i686' machine='pc'>hvm</type>
+    <boot dev='hd'/>
+  </os>
+  <clock offset='utc'/>
+  <on_poweroff>destroy</on_poweroff>
+  <on_reboot>restart</on_reboot>
+  <on_crash>destroy</on_crash>
+  <devices>
+    <emulator>/usr/bin/qemu-system-i686</emulator>
+    <controller type='usb' index='0'/>
+    <controller type='ide' index='0'/>
+    <controller type='pci' index='0' model='pci-root'/>
+    <input type='mouse' bus='ps2'/>
+    <input type='keyboard' bus='ps2'/>
+    <memballoon model='virtio'/>
+  </devices>
+</domain>
[...]
Post by Martin Kletzander
diff --git a/tests/genericxml2xmlindata/cachetune.xml
b/tests/genericxml2xmlindata/cachetune.xml
new file mode 100644
index 000000000000..0051410aec32
--- /dev/null
+++ b/tests/genericxml2xmlindata/cachetune.xml
@@ -0,0 +1,33 @@
+<domain type='qemu'>
+  <name>QEMUGuest1</name>
+  <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+  <memory unit='KiB'>219136</memory>
+  <currentMemory unit='KiB'>219136</currentMemory>
+  <vcpu placement='static'>2</vcpu>
+  <cputune>
+    <cachetune vcpus='0-1'>
+      <cache id='0' level='3' type='both' size='3' unit='MiB'/>
+      <cache id='1' level='3' type='both' size='3' unit='MiB'/>
+    </cachetune>
+    <cachetune vcpus='3'>
+      <cache id='0' level='3' type='both' size='3' unit='MiB'/>
+    </cachetune>
Another one with vcpusid > nvcpus defined above....
Since adjustments are simple going forward. I'm not sure how you should
handle the vcpusid value > nvcpus if that's an issue or not...
I have to send another version anyway.  Not only because
virCachetuneHelperData is not used at all, but other things need
adjustments.
Post by John Ferlan
John
Post by Martin Kletzander
+  </cputune>
+  <os>
+    <type arch='i686' machine='pc'>hvm</type>
+    <boot dev='hd'/>
+  </os>
+  <clock offset='utc'/>
+  <on_poweroff>destroy</on_poweroff>
+  <on_reboot>restart</on_reboot>
+  <on_crash>destroy</on_crash>
+  <devices>
+    <emulator>/usr/bin/qemu-system-i686</emulator>
+    <controller type='usb' index='0'/>
+    <controller type='ide' index='0'/>
+    <controller type='pci' index='0' model='pci-root'/>
+    <input type='mouse' bus='ps2'/>
+    <input type='keyboard' bus='ps2'/>
+    <memballoon model='virtio'/>
+  </devices>
+</domain>
Martin Kletzander
2017-12-13 15:39:29 UTC
Permalink
This test initializes capabilities from vircaps2xmldata (since it exists there
already) and then requests list of free bitmaps (all unallocated space) from
virresctrl.c

Desirable outputs are saved in virresctrldata.

Signed-off-by: Martin Kletzander <***@redhat.com>
---
tests/Makefile.am | 8 +-
tests/virresctrldata/resctrl-cdp.schemata | 2 +
.../virresctrldata/resctrl-skx-twocaches.schemata | 1 +
tests/virresctrldata/resctrl-skx.schemata | 1 +
tests/virresctrldata/resctrl.schemata | 1 +
tests/virresctrltest.c | 102 +++++++++++++++++++++
6 files changed, 114 insertions(+), 1 deletion(-)
create mode 100644 tests/virresctrldata/resctrl-cdp.schemata
create mode 100644 tests/virresctrldata/resctrl-skx-twocaches.schemata
create mode 100644 tests/virresctrldata/resctrl-skx.schemata
create mode 100644 tests/virresctrldata/resctrl.schemata
create mode 100644 tests/virresctrltest.c

diff --git a/tests/Makefile.am b/tests/Makefile.am
index 3441dab6f6bb..d045073726f0 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -232,6 +232,7 @@ if WITH_LINUX
test_programs += fchosttest
test_programs += scsihosttest
test_programs += vircaps2xmltest
+test_programs += virresctrltest
test_libraries += virusbmock.la \
virnetdevbandwidthmock.la \
virnumamock.la \
@@ -1165,8 +1166,13 @@ virnumamock_la_SOURCES = \
virnumamock_la_LDFLAGS = $(MOCKLIBS_LDFLAGS)
virnumamock_la_LIBADD = $(MOCKLIBS_LIBS)

+virresctrltest_SOURCES = \
+ virresctrltest.c testutils.h testutils.c virfilewrapper.h virfilewrapper.c
+virresctrltest_LDADD = $(LDADDS)
+
else ! WITH_LINUX
-EXTRA_DIST += vircaps2xmltest.c virnumamock.c virfilewrapper.c virfilewrapper.h
+EXTRA_DIST += vircaps2xmltest.c virnumamock.c virfilewrapper.c \
+ virfilewrapper.h virresctrltest.c
endif ! WITH_LINUX

if WITH_NSS
diff --git a/tests/virresctrldata/resctrl-cdp.schemata b/tests/virresctrldata/resctrl-cdp.schemata
new file mode 100644
index 000000000000..2897e2afa603
--- /dev/null
+++ b/tests/virresctrldata/resctrl-cdp.schemata
@@ -0,0 +1,2 @@
+L3CODE:0=00ffc;1=0ff00
+L3DATA:0=00000;1=03fff
diff --git a/tests/virresctrldata/resctrl-skx-twocaches.schemata b/tests/virresctrldata/resctrl-skx-twocaches.schemata
new file mode 100644
index 000000000000..86b3801a04c2
--- /dev/null
+++ b/tests/virresctrldata/resctrl-skx-twocaches.schemata
@@ -0,0 +1 @@
+L3:0=001;1=400
diff --git a/tests/virresctrldata/resctrl-skx.schemata b/tests/virresctrldata/resctrl-skx.schemata
new file mode 100644
index 000000000000..5e8b0d636277
--- /dev/null
+++ b/tests/virresctrldata/resctrl-skx.schemata
@@ -0,0 +1 @@
+L3:0=70f
diff --git a/tests/virresctrldata/resctrl.schemata b/tests/virresctrldata/resctrl.schemata
new file mode 100644
index 000000000000..fa980e58c9dd
--- /dev/null
+++ b/tests/virresctrldata/resctrl.schemata
@@ -0,0 +1 @@
+L3:0=000ff;1=000f0
diff --git a/tests/virresctrltest.c b/tests/virresctrltest.c
new file mode 100644
index 000000000000..80d0e82e7cbf
--- /dev/null
+++ b/tests/virresctrltest.c
@@ -0,0 +1,102 @@
+#include <config.h>
+#include <stdlib.h>
+
+#include "testutils.h"
+#include "virfilewrapper.h"
+#include "virresctrlpriv.h"
+
+
+#define VIR_FROM_THIS VIR_FROM_NONE
+
+struct virResctrlData {
+ const char *filename;
+ bool fail;
+};
+
+
+static int
+test_virResctrlGetFree(const void *opaque)
+{
+ struct virResctrlData *data = (struct virResctrlData *) opaque;
+ char *system_dir = NULL;
+ char *resctrl_dir = NULL;
+ int ret = -1;
+ virResctrlAllocPtr alloc = NULL;
+ char *schemata_str = NULL;
+ char *schemata_file;
+ virCapsPtr caps = NULL;
+
+ if (virAsprintf(&system_dir, "%s/vircaps2xmldata/linux-%s/system",
+ abs_srcdir, data->filename) < 0)
+ goto cleanup;
+
+ if (virAsprintf(&resctrl_dir, "%s/vircaps2xmldata/linux-%s/resctrl",
+ abs_srcdir, data->filename) < 0)
+ goto cleanup;
+
+ if (virAsprintf(&schemata_file, "%s/virresctrldata/%s.schemata",
+ abs_srcdir, data->filename) < 0)
+ goto cleanup;
+
+ virFileWrapperAddPrefix("/sys/devices/system", system_dir);
+ virFileWrapperAddPrefix("/sys/fs/resctrl", resctrl_dir);
+
+ caps = virCapabilitiesNew(VIR_ARCH_X86_64, false, false);
+ if (!caps || virCapabilitiesInitCaches(caps) < 0) {
+ fprintf(stderr, "Could not initialize capabilities");
+ goto cleanup;
+ }
+
+ alloc = virResctrlAllocGetFree(caps->host.resctrl);
+
+ virFileWrapperClearPrefixes();
+
+ if (!alloc) {
+ if (data->fail)
+ ret = 0;
+ goto cleanup;
+ } else if (data->fail) {
+ VIR_TEST_DEBUG("Error expected but there wasn't any.\n");
+ ret = -1;
+ goto cleanup;
+ }
+
+ schemata_str = virResctrlAllocFormat(alloc);
+
+ if (virTestCompareToFile(schemata_str, schemata_file) < 0)
+ goto cleanup;
+
+ ret = 0;
+ cleanup:
+ virObjectUnref(caps);
+ virObjectUnref(alloc);
+ VIR_FREE(system_dir);
+ VIR_FREE(resctrl_dir);
+ VIR_FREE(schemata_str);
+ VIR_FREE(schemata_file);
+ return ret;
+}
+
+
+static int
+mymain(void)
+{
+ struct virResctrlData data = {0};
+ int ret = 0;
+
+#define DO_TEST_FREE(_filename) \
+ do { \
+ data = (struct virResctrlData) { .filename = _filename }; \
+ if (virTestRun("Free: " _filename, test_virResctrlGetFree, &data) < 0) \
+ ret = -1; \
+ } while (0)
+
+ DO_TEST_FREE("resctrl");
+ DO_TEST_FREE("resctrl-cdp");
+ DO_TEST_FREE("resctrl-skx");
+ DO_TEST_FREE("resctrl-skx-twocaches");
+
+ return ret;
+}
+
+VIR_TEST_MAIN(mymain)
--
2.15.1
John Ferlan
2018-01-04 00:09:38 UTC
Permalink
Post by Martin Kletzander
This test initializes capabilities from vircaps2xmldata (since it exists there
already) and then requests list of free bitmaps (all unallocated space) from
virresctrl.c
Desirable outputs are saved in virresctrldata.
---
tests/Makefile.am | 8 +-
tests/virresctrldata/resctrl-cdp.schemata | 2 +
.../virresctrldata/resctrl-skx-twocaches.schemata | 1 +
tests/virresctrldata/resctrl-skx.schemata | 1 +
tests/virresctrldata/resctrl.schemata | 1 +
tests/virresctrltest.c | 102 +++++++++++++++++++++
6 files changed, 114 insertions(+), 1 deletion(-)
create mode 100644 tests/virresctrldata/resctrl-cdp.schemata
create mode 100644 tests/virresctrldata/resctrl-skx-twocaches.schemata
create mode 100644 tests/virresctrldata/resctrl-skx.schemata
create mode 100644 tests/virresctrldata/resctrl.schemata
create mode 100644 tests/virresctrltest.c
Looks reasonable...

Reviewed-by: John Ferlan <***@redhat.com>

John
Martin Kletzander
2017-12-13 15:39:30 UTC
Permalink
Signed-off-by: Martin Kletzander <***@redhat.com>
---
src/qemu/qemu_process.c | 61 +++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 56 insertions(+), 5 deletions(-)

diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index a0f430f89f06..8abd9a5a8da3 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -2507,6 +2507,33 @@ qemuProcessSetupEmulator(virDomainObjPtr vm)
}


+static int
+qemuProcessResctrlCreate(virQEMUDriverPtr driver,
+ virDomainObjPtr vm)
+{
+ int ret = -1;
+ size_t i = 0;
+ virCapsPtr caps = virQEMUDriverGetCapabilities(driver, false);
+ qemuDomainObjPrivatePtr priv = vm->privateData;
+
+ if (!caps)
+ return -1;
+
+ for (i = 0; i < vm->def->ncachetunes; i++) {
+ if (virResctrlAllocCreate(caps->host.resctrl,
+ vm->def->cachetunes[i]->alloc,
+ "qemu",
+ priv->machineName) < 0)
+ goto cleanup;
+ }
+
+ ret = 0;
+ cleanup:
+ virObjectUnref(caps);
+ return ret;
+}
+
+
static int
qemuProcessInitPasswords(virConnectPtr conn,
virQEMUDriverPtr driver,
@@ -5013,12 +5040,26 @@ qemuProcessSetupVcpu(virDomainObjPtr vm,
{
pid_t vcpupid = qemuDomainGetVcpuPid(vm, vcpuid);
virDomainVcpuDefPtr vcpu = virDomainDefGetVcpu(vm->def, vcpuid);
+ size_t i = 0;

- return qemuProcessSetupPid(vm, vcpupid, VIR_CGROUP_THREAD_VCPU,
- vcpuid, vcpu->cpumask,
- vm->def->cputune.period,
- vm->def->cputune.quota,
- &vcpu->sched);
+ if (qemuProcessSetupPid(vm, vcpupid, VIR_CGROUP_THREAD_VCPU,
+ vcpuid, vcpu->cpumask,
+ vm->def->cputune.period,
+ vm->def->cputune.quota,
+ &vcpu->sched) < 0)
+ return -1;
+
+ for (i = 0; i < vm->def->ncachetunes; i++) {
+ virDomainCachetuneDefPtr ct = vm->def->cachetunes[i];
+
+ if (virBitmapIsBitSet(ct->vcpus, vcpuid)) {
+ if (virResctrlAllocAddPID(ct->alloc, vcpupid) < 0)
+ return -1;
+ break;
+ }
+ }
+
+ return 0;
}


@@ -5891,6 +5932,10 @@ qemuProcessLaunch(virConnectPtr conn,
if (qemuProcessSetupEmulator(vm) < 0)
goto cleanup;

+ VIR_DEBUG("Setting up resctrlfs");
+ if (qemuProcessResctrlCreate(driver, vm) < 0)
+ goto cleanup;
+
VIR_DEBUG("Setting domain security labels");
if (qemuSecuritySetAllLabel(driver,
vm,
@@ -6539,6 +6584,12 @@ void qemuProcessStop(virQEMUDriverPtr driver,
vm->def->name);
}

+ /* Remove resctrl allocation after cgroups are cleaned up which makes it
+ * kind of safer (although removing the allocation should work even with
+ * pids in tasks file */
+ for (i = 0; i < vm->def->ncachetunes; i++)
+ virResctrlAllocRemove(vm->def->cachetunes[i]->alloc);
+
qemuProcessRemoveDomainStatus(driver, vm);

/* Remove VNC and Spice ports from port reservation bitmap, but only if
--
2.15.1
John Ferlan
2018-01-04 00:19:00 UTC
Permalink
Need to beef up this commit message.
Post by Martin Kletzander
---
src/qemu/qemu_process.c | 61 +++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 56 insertions(+), 5 deletions(-)
Is there anything special that needs to be done at libvirtd restart time?

If hot{plug|unplug} isn't supported if a vcpu has a vcputune defined,
then we probably need to inhibit that. Otherwise, what needs to be done
to support it?

John
Post by Martin Kletzander
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index a0f430f89f06..8abd9a5a8da3 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -2507,6 +2507,33 @@ qemuProcessSetupEmulator(virDomainObjPtr vm)
}
+static int
+qemuProcessResctrlCreate(virQEMUDriverPtr driver,
+ virDomainObjPtr vm)
+{
+ int ret = -1;
+ size_t i = 0;
+ virCapsPtr caps = virQEMUDriverGetCapabilities(driver, false);
+ qemuDomainObjPrivatePtr priv = vm->privateData;
+
+ if (!caps)
+ return -1;
+
+ for (i = 0; i < vm->def->ncachetunes; i++) {
+ if (virResctrlAllocCreate(caps->host.resctrl,
+ vm->def->cachetunes[i]->alloc,
+ "qemu",
+ priv->machineName) < 0)
+ goto cleanup;
+ }
+
+ ret = 0;
+ virObjectUnref(caps);
+ return ret;
+}
+
+
static int
qemuProcessInitPasswords(virConnectPtr conn,
virQEMUDriverPtr driver,
@@ -5013,12 +5040,26 @@ qemuProcessSetupVcpu(virDomainObjPtr vm,
{
pid_t vcpupid = qemuDomainGetVcpuPid(vm, vcpuid);
virDomainVcpuDefPtr vcpu = virDomainDefGetVcpu(vm->def, vcpuid);
+ size_t i = 0;
- return qemuProcessSetupPid(vm, vcpupid, VIR_CGROUP_THREAD_VCPU,
- vcpuid, vcpu->cpumask,
- vm->def->cputune.period,
- vm->def->cputune.quota,
- &vcpu->sched);
+ if (qemuProcessSetupPid(vm, vcpupid, VIR_CGROUP_THREAD_VCPU,
+ vcpuid, vcpu->cpumask,
+ vm->def->cputune.period,
+ vm->def->cputune.quota,
+ &vcpu->sched) < 0)
+ return -1;
+
+ for (i = 0; i < vm->def->ncachetunes; i++) {
+ virDomainCachetuneDefPtr ct = vm->def->cachetunes[i];
+
+ if (virBitmapIsBitSet(ct->vcpus, vcpuid)) {
+ if (virResctrlAllocAddPID(ct->alloc, vcpupid) < 0)
+ return -1;
+ break;
+ }
+ }
+
+ return 0;
}
@@ -5891,6 +5932,10 @@ qemuProcessLaunch(virConnectPtr conn,
if (qemuProcessSetupEmulator(vm) < 0)
goto cleanup;
+ VIR_DEBUG("Setting up resctrlfs");
+ if (qemuProcessResctrlCreate(driver, vm) < 0)
+ goto cleanup;
+
VIR_DEBUG("Setting domain security labels");
if (qemuSecuritySetAllLabel(driver,
vm,
@@ -6539,6 +6584,12 @@ void qemuProcessStop(virQEMUDriverPtr driver,
vm->def->name);
}
+ /* Remove resctrl allocation after cgroups are cleaned up which makes it
+ * kind of safer (although removing the allocation should work even with
+ * pids in tasks file */
+ for (i = 0; i < vm->def->ncachetunes; i++)
+ virResctrlAllocRemove(vm->def->cachetunes[i]->alloc);
+
qemuProcessRemoveDomainStatus(driver, vm);
/* Remove VNC and Spice ports from port reservation bitmap, but only if
Martin Kletzander
2018-01-23 11:41:56 UTC
Permalink
Post by John Ferlan
Need to beef up this commit message.
Post by Martin Kletzander
---
src/qemu/qemu_process.c | 61 +++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 56 insertions(+), 5 deletions(-)
Is there anything special that needs to be done at libvirtd restart time?
There is. One more attribute needs to be parsed from the config that will only
be available in status XML.
Post by John Ferlan
If hot{plug|unplug} isn't supported if a vcpu has a vcputune defined,
then we probably need to inhibit that. Otherwise, what needs to be done
to support it?
It is. It's just that our code is written so nicely that I didn't need to touch
qemu_hotplug.c at all. For hotunplug there's not much we can do and for hotplug
the function qemuProcessSetupVcpu() is called.
Martin Kletzander
2017-12-13 15:39:31 UTC
Permalink
Signed-off-by: Martin Kletzander <***@redhat.com>
---
docs/news.xml | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/docs/news.xml b/docs/news.xml
index 0ec9857e2cab..ac3298de01a8 100644
--- a/docs/news.xml
+++ b/docs/news.xml
@@ -35,6 +35,15 @@
<libvirt>
<release version="v4.0.0" date="unreleased">
<section title="New features">
+ <change>
+ <summary>
+ Added support for CAT (Cache allocation Technology)
+ </summary>
+ <description>
+ Domain vCPU threads can now have allocated some parts of host cache
+ using the <code>cachetune</code> element in <code>cputune</code>.
+ </description>
+ </change>
</section>
<section title="Improvements">
</section>
--
2.15.1
John Ferlan
2018-01-04 00:20:50 UTC
Permalink
Post by Martin Kletzander
---
docs/news.xml | 9 +++++++++
1 file changed, 9 insertions(+)
Just for completeness... Obviously this will need final tweaks... So far
so good though...

John
Post by Martin Kletzander
diff --git a/docs/news.xml b/docs/news.xml
index 0ec9857e2cab..ac3298de01a8 100644
--- a/docs/news.xml
+++ b/docs/news.xml
@@ -35,6 +35,15 @@
<libvirt>
<release version="v4.0.0" date="unreleased">
<section title="New features">
+ <change>
+ <summary>
+ Added support for CAT (Cache allocation Technology)
+ </summary>
+ <description>
+ Domain vCPU threads can now have allocated some parts of host cache
+ using the <code>cachetune</code> element in <code>cputune</code>.
+ </description>
+ </change>
</section>
<section title="Improvements">
</section>
Martin Kletzander
2017-12-13 15:39:27 UTC
Permalink
With this commit we finally have a way to read and manipulate basic resctrl
settings. Locking is done only on exposed functions that read/write from/to
resctrlfs. Not in functions that are exposed in virresctrlpriv.h as those are
only supposed to be used from tests.

Signed-off-by: Martin Kletzander <***@redhat.com>
---
src/Makefile.am | 2 +-
src/libvirt_private.syms | 11 +
src/util/virresctrl.c | 1041 ++++++++++++++++++++++++++++++++++++++++++++-
src/util/virresctrl.h | 62 +++
src/util/virresctrlpriv.h | 27 ++
5 files changed, 1141 insertions(+), 2 deletions(-)
create mode 100644 src/util/virresctrlpriv.h

diff --git a/src/Makefile.am b/src/Makefile.am
index 4c022d1e44b9..9b4fd0c1d597 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -167,7 +167,7 @@ UTIL_SOURCES = \
util/virprocess.c util/virprocess.h \
util/virqemu.c util/virqemu.h \
util/virrandom.h util/virrandom.c \
- util/virresctrl.h util/virresctrl.c \
+ util/virresctrl.h util/virresctrl.c util/virresctrlpriv.h \
util/virrotatingfile.h util/virrotatingfile.c \
util/virscsi.c util/virscsi.h \
util/virscsihost.c util/virscsihost.h \
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 1a4f304f5e1f..a8009d913669 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2548,6 +2548,17 @@ virRandomInt;
# util/virresctrl.h
virCacheTypeFromString;
virCacheTypeToString;
+virResctrlAllocAddPID;
+virResctrlAllocCreate;
+virResctrlAllocForeachSize;
+virResctrlAllocFormat;
+virResctrlAllocGetFree;
+virResctrlAllocNew;
+virResctrlAllocRemove;
+virResctrlAllocSetID;
+virResctrlAllocSetSize;
+virResctrlAllocUpdateMask;
+virResctrlAllocUpdateSize;
virResctrlGetInfo;
virResctrlInfoGetCache;
virResctrlInfoNew;
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index a681322905be..32ab84b6f121 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -18,8 +18,12 @@

#include <config.h>

-#include "virresctrl.h"
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>

+#include "virresctrlpriv.h"
#include "c-ctype.h"
#include "count-one-bits.h"
#include "viralloc.h"
@@ -151,6 +155,156 @@ virResctrlInfoNew(void)
}


+/* Alloc-related definitions and AllocClass-related functions */
+typedef struct _virResctrlAllocPerType virResctrlAllocPerType;
+typedef virResctrlAllocPerType *virResctrlAllocPerTypePtr;
+struct _virResctrlAllocPerType {
+ /* There could be bool saying whether this is set or not, but since everything
+ * in virResctrlAlloc (and most of libvirt) goes with pointer arrays we would
+ * have to have one more level of allocation anyway, so this stays faithful to
+ * the concept */
+ unsigned long long **sizes;
+ size_t nsizes;
+
+ /* Mask for each cache */
+ virBitmapPtr *masks;
+ size_t nmasks;
+};
+
+typedef struct _virResctrlAllocPerLevel virResctrlAllocPerLevel;
+typedef virResctrlAllocPerLevel *virResctrlAllocPerLevelPtr;
+struct _virResctrlAllocPerLevel {
+ virResctrlAllocPerTypePtr *types; /* Indexed with enum virCacheType */
+};
+
+struct _virResctrlAlloc {
+ virObject parent;
+
+ virResctrlAllocPerLevelPtr *levels;
+ size_t nlevels;
+
+ char *id; /* The identifier (any unique string for now) */
+ char *path;
+};
+
+static virClassPtr virResctrlAllocClass;
+
+static void
+virResctrlAllocDispose(void *obj)
+{
+ size_t i = 0;
+ size_t j = 0;
+ size_t k = 0;
+
+ virResctrlAllocPtr resctrl = obj;
+
+ for (i = 0; i < resctrl->nlevels; i++) {
+ virResctrlAllocPerLevelPtr level = resctrl->levels[i];
+
+ if (!level)
+ continue;
+
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlAllocPerTypePtr type = level->types[j];
+
+ if (!type)
+ continue;
+
+ for (k = 0; k < type->nsizes; k++)
+ VIR_FREE(type->sizes[k]);
+
+ for (k = 0; k < type->nmasks; k++)
+ virBitmapFree(type->masks[k]);
+
+ VIR_FREE(type->sizes);
+ VIR_FREE(type->masks);
+ VIR_FREE(type);
+ }
+ VIR_FREE(level->types);
+ VIR_FREE(level);
+ }
+
+ VIR_FREE(resctrl->id);
+ VIR_FREE(resctrl->path);
+ VIR_FREE(resctrl->levels);
+}
+
+
+static int
+virResctrlAllocOnceInit(void)
+{
+ if (!(virResctrlAllocClass = virClassNew(virClassForObject(),
+ "virResctrlAlloc",
+ sizeof(virResctrlAlloc),
+ virResctrlAllocDispose)))
+ return -1;
+
+ return 0;
+}
+
+
+VIR_ONCE_GLOBAL_INIT(virResctrlAlloc)
+
+
+virResctrlAllocPtr
+virResctrlAllocNew(void)
+{
+ if (virResctrlAllocInitialize() < 0)
+ return NULL;
+
+ return virObjectNew(virResctrlAllocClass);
+}
+
+
+/* Common functions */
+static int
+virResctrlLockInternal(int op)
+{
+ int fd = open(SYSFS_RESCTRL_PATH, O_DIRECTORY | O_CLOEXEC);
+
+ if (fd < 0) {
+ virReportSystemError(errno, "%s", _("Cannot open resctrlfs"));
+ return -1;
+ }
+
+ if (flock(fd, op) < 0) {
+ virReportSystemError(errno, "%s", _("Cannot lock resctrlfs"));
+ VIR_FORCE_CLOSE(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+
+static inline int
+virResctrlLockWrite(void)
+{
+ return virResctrlLockInternal(LOCK_EX);
+}
+
+
+static int
+virResctrlUnlock(int fd)
+{
+ if (fd == -1)
+ return 0;
+
+ /* The lock gets unlocked by closing the fd, which we need to do anyway in
+ * order to clean up properly */
+ if (VIR_CLOSE(fd) < 0) {
+ virReportSystemError(errno, "%s", _("Cannot close resctrlfs"));
+
+ /* Trying to save the already broken */
+ if (flock(fd, LOCK_UN) < 0)
+ virReportSystemError(errno, "%s", _("Cannot unlock resctrlfs"));
+ return -1;
+ }
+
+ return 0;
+}
+
+
/* Info-related functions */
bool
virResctrlInfoIsEmpty(virResctrlInfoPtr resctrl)
@@ -354,3 +508,888 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
VIR_FREE(*controls);
goto cleanup;
}
+
+
+/* Alloc-related functions */
+bool
+virResctrlAllocIsEmpty(virResctrlAllocPtr resctrl)
+{
+ size_t i = 0;
+ size_t j = 0;
+ size_t k = 0;
+
+ if (!resctrl)
+ return true;
+
+ for (i = 0; i < resctrl->nlevels; i++) {
+ virResctrlAllocPerLevelPtr a_level = resctrl->levels[i];
+
+ if (!a_level)
+ continue;
+
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[j];
+
+ if (!a_type)
+ continue;
+
+ for (k = 0; k < a_type->nsizes; k++) {
+ if (a_type->sizes[k])
+ return false;
+ }
+
+ for (k = 0; k < a_type->nmasks; k++) {
+ if (a_type->masks[k])
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+
+static virResctrlAllocPerTypePtr
+virResctrlAllocGetType(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type)
+{
+ virResctrlAllocPerLevelPtr a_level = NULL;
+
+ if (resctrl->nlevels <= level &&
+ VIR_EXPAND_N(resctrl->levels, resctrl->nlevels, level - resctrl->nlevels + 1) < 0)
+ return NULL;
+
+ if (!resctrl->levels[level] &&
+ (VIR_ALLOC(resctrl->levels[level]) < 0 ||
+ VIR_ALLOC_N(resctrl->levels[level]->types, VIR_CACHE_TYPE_LAST) < 0))
+ return NULL;
+
+ a_level = resctrl->levels[level];
+
+ if (!a_level->types[type] && VIR_ALLOC(a_level->types[type]) < 0)
+ return NULL;
+
+ return a_level->types[type];
+}
+
+
+int
+virResctrlAllocUpdateMask(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ virBitmapPtr mask)
+{
+ virResctrlAllocPerTypePtr a_type = virResctrlAllocGetType(resctrl, level, type);
+
+ if (!a_type)
+ return -1;
+
+ if (a_type->nmasks <= cache &&
+ VIR_EXPAND_N(a_type->masks, a_type->nmasks,
+ cache - a_type->nmasks + 1) < 0)
+ return -1;
+
+ if (!a_type->masks[cache]) {
+ a_type->masks[cache] = virBitmapNew(virBitmapSize(mask));
+
+ if (!a_type->masks[cache])
+ return -1;
+ }
+
+ return virBitmapCopy(a_type->masks[cache], mask);
+}
+
+
+int
+virResctrlAllocUpdateSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size)
+{
+ virResctrlAllocPerTypePtr a_type = virResctrlAllocGetType(resctrl, level, type);
+
+ if (!a_type)
+ return -1;
+
+ if (a_type->nsizes <= cache &&
+ VIR_EXPAND_N(a_type->sizes, a_type->nsizes,
+ cache - a_type->nsizes + 1) < 0)
+ return -1;
+
+ if (!a_type->sizes[cache] && VIR_ALLOC(a_type->sizes[cache]) < 0)
+ return -1;
+
+ *(a_type->sizes[cache]) = size;
+
+ return 0;
+}
+
+
+static bool
+virResctrlAllocCheckCollision(virResctrlAllocPtr a,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache)
+{
+ virResctrlAllocPerLevelPtr a_level = NULL;
+ virResctrlAllocPerTypePtr a_type = NULL;
+
+ if (!a)
+ return false;
+
+ if (a->nlevels <= level)
+ return false;
+
+ a_level = a->levels[level];
+
+ if (!a_level)
+ return false;
+
+ /* All types should be always allocated */
+ a_type = a_level->types[VIR_CACHE_TYPE_BOTH];
+
+ /* If there is an allocation for type 'both', there can be no other
+ * allocation for the same cache */
+ if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+ return true;
+
+ if (type == VIR_CACHE_TYPE_BOTH) {
+ a_type = a_level->types[VIR_CACHE_TYPE_CODE];
+
+ if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+ return true;
+
+ a_type = a_level->types[VIR_CACHE_TYPE_DATA];
+
+ if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+ return true;
+ } else {
+ a_type = a_level->types[type];
+
+ if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+ return true;
+ }
+
+ return false;
+}
+
+
+int
+virResctrlAllocSetSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size)
+{
+ if (virResctrlAllocCheckCollision(resctrl, level, type, cache)) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("Colliding cache allocations for cache "
+ "level '%u' id '%u', type '%s'"),
+ level, cache, virCacheTypeToString(type));
+ return -1;
+ }
+
+ return virResctrlAllocUpdateSize(resctrl, level, type, cache, size);
+}
+
+
+int
+virResctrlAllocForeachSize(virResctrlAllocPtr resctrl,
+ virResctrlAllocForeachSizeCallback cb,
+ void *opaque)
+{
+ int ret = 0;
+ unsigned int level = 0;
+ unsigned int type = 0;
+ unsigned int cache = 0;
+
+ if (!resctrl)
+ return 0;
+
+ for (level = 0; level < resctrl->nlevels; level++) {
+ virResctrlAllocPerLevelPtr a_level = resctrl->levels[level];
+
+ if (!a_level)
+ continue;
+
+ for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[type];
+
+ if (!a_type)
+ continue;
+
+ for (cache = 0; cache < a_type->nsizes; cache++) {
+ unsigned long long *size = a_type->sizes[cache];
+
+ if (!size)
+ continue;
+
+ ret = cb(level, type, cache, *size, opaque);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+int
+virResctrlAllocSetID(virResctrlAllocPtr alloc,
+ const char *id)
+{
+ if (!id) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Resctrl allocation 'id' cannot be NULL"));
+ return -1;
+ }
+
+ return VIR_STRDUP(alloc->id, id);
+}
+
+
+char *
+virResctrlAllocFormat(virResctrlAllocPtr resctrl)
+{
+ virBuffer buf = VIR_BUFFER_INITIALIZER;
+ unsigned int level = 0;
+ unsigned int type = 0;
+ unsigned int cache = 0;
+
+ if (!resctrl)
+ return NULL;
+
+ for (level = 0; level < resctrl->nlevels; level++) {
+ virResctrlAllocPerLevelPtr a_level = resctrl->levels[level];
+
+ if (!a_level)
+ continue;
+
+ for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[type];
+
+ if (!a_type)
+ continue;
+
+ virBufferAsprintf(&buf, "L%u%s:", level, virResctrlTypeToString(type));
+
+ for (cache = 0; cache < a_type->nmasks; cache++) {
+ virBitmapPtr mask = a_type->masks[cache];
+ char *mask_str = NULL;
+
+ if (!mask)
+ continue;
+
+ mask_str = virBitmapToString(mask, false, true);
+ if (!mask_str) {
+ virBufferFreeAndReset(&buf);
+ return NULL;
+ }
+
+ virBufferAsprintf(&buf, "%u=%s;", cache, mask_str);
+ VIR_FREE(mask_str);
+ }
+
+ virBufferTrim(&buf, ";", 1);
+ virBufferAddChar(&buf, '\n');
+ }
+ }
+
+ virBufferCheckError(&buf);
+ return virBufferContentAndReset(&buf);
+}
+
+
+static int
+virResctrlAllocParseProcessCache(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ char *cache)
+{
+ char *tmp = strchr(cache, '=');
+ unsigned int cache_id = 0;
+ virBitmapPtr mask = NULL;
+ int ret = -1;
+
+ if (!tmp)
+ return 0;
+
+ *tmp = '\0';
+ tmp++;
+
+ if (virStrToLong_uip(cache, NULL, 10, &cache_id) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Invalid cache id '%s'"), cache);
+ return -1;
+ }
+
+ mask = virBitmapNewString(tmp);
+ if (!mask)
+ return -1;
+
+ if (virResctrlAllocUpdateMask(resctrl, level, type, cache_id, mask) < 0)
+ goto cleanup;
+
+ ret = 0;
+ cleanup:
+ virBitmapFree(mask);
+ return ret;
+}
+
+
+static int
+virResctrlAllocParseProcessLine(virResctrlAllocPtr resctrl,
+ char *line)
+{
+ char **caches = NULL;
+ char *tmp = NULL;
+ unsigned int level = 0;
+ int type = -1;
+ size_t ncaches = 0;
+ size_t i = 0;
+ int ret = -1;
+
+ /* Skip lines that don't concern caches, e.g. MB: etc. */
+ if (line[0] != 'L')
+ return 0;
+
+ /* And lines that we can't parse too */
+ tmp = strchr(line, ':');
+ if (!tmp)
+ return 0;
+
+ *tmp = '\0';
+ tmp++;
+
+ if (virStrToLong_uip(line + 1, &line, 10, &level) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Cannot parse resctrl schema level '%s'"),
+ line + 1);
+ return -1;
+ }
+
+ type = virResctrlTypeFromString(line);
+ if (type < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Cannot parse resctrl schema level '%s'"),
+ line + 1);
+ return -1;
+ }
+
+ caches = virStringSplitCount(tmp, ";", 0, &ncaches);
+ if (!caches)
+ return 0;
+
+ for (i = 0; i < ncaches; i++) {
+ if (virResctrlAllocParseProcessCache(resctrl, level, type, caches[i]) < 0)
+ goto cleanup;
+ }
+
+ ret = 0;
+ cleanup:
+ virStringListFree(caches);
+ return ret;
+}
+
+
+static int
+virResctrlAllocParse(virResctrlAllocPtr alloc,
+ const char *schemata)
+{
+ char **lines = NULL;
+ size_t nlines = 0;
+ size_t i = 0;
+ int ret = -1;
+
+ lines = virStringSplitCount(schemata, "\n", 0, &nlines);
+ for (i = 0; i < nlines; i++) {
+ if (virResctrlAllocParseProcessLine(alloc, lines[i]) < 0)
+ goto cleanup;
+ }
+
+ ret = 0;
+ cleanup:
+ virStringListFree(lines);
+ return ret;
+}
+
+
+static void
+virResctrlAllocSubtractPerType(virResctrlAllocPerTypePtr a,
+ virResctrlAllocPerTypePtr b)
+{
+ size_t i = 0;
+
+ if (!a || !b)
+ return;
+
+ for (i = 0; i < a->nmasks && i < b->nmasks; ++i) {
+ if (a->masks[i] && b->masks[i])
+ virBitmapSubtract(a->masks[i], b->masks[i]);
+ }
+}
+
+
+static void
+virResctrlAllocSubtract(virResctrlAllocPtr a,
+ virResctrlAllocPtr b)
+{
+ size_t i = 0;
+ size_t j = 0;
+
+ if (!b)
+ return;
+
+ for (i = 0; i < a->nlevels && b->nlevels; ++i) {
+ if (a->levels[i] && b->levels[i]) {
+ /* Here we rely on all the system allocations to use the same types.
+ * We kind of _hope_ it's the case. If this is left here until the
+ * review and someone finds it, then suggest only removing this last
+ * sentence. */
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlAllocSubtractPerType(a->levels[i]->types[j],
+ b->levels[i]->types[j]);
+ }
+ }
+ }
+}
+
+
+static virResctrlAllocPtr
+virResctrlAllocNewFromInfo(virResctrlInfoPtr info)
+{
+ size_t i = 0;
+ size_t j = 0;
+ size_t k = 0;
+ virResctrlAllocPtr ret = virResctrlAllocNew();
+ virBitmapPtr mask = NULL;
+
+ if (!ret)
+ return NULL;
+
+ for (i = 0; i < info->nlevels; i++) {
+ virResctrlInfoPerLevelPtr i_level = info->levels[i];
+
+ if (!i_level)
+ continue;
+
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlInfoPerTypePtr i_type = i_level->types[j];
+
+ if (!i_type)
+ continue;
+
+ virBitmapFree(mask);
+ mask = virBitmapNew(i_type->bits);
+ if (!mask)
+ goto error;
+ virBitmapSetAll(mask);
+
+ for (k = 0; k <= i_type->max_cache_id; k++) {
+ if (virResctrlAllocUpdateMask(ret, i, j, k, mask) < 0)
+ goto error;
+ }
+ }
+ }
+
+ cleanup:
+ virBitmapFree(mask);
+ return ret;
+ error:
+ virObjectUnref(ret);
+ ret = NULL;
+ goto cleanup;
+}
+
+
+virResctrlAllocPtr
+virResctrlAllocGetFree(virResctrlInfoPtr resctrl)
+{
+ virResctrlAllocPtr ret = NULL;
+ virResctrlAllocPtr alloc = NULL;
+ virBitmapPtr mask = NULL;
+ struct dirent *ent = NULL;
+ DIR *dirp = NULL;
+ char *schemata = NULL;
+ int rv = -1;
+
+ if (virResctrlInfoIsEmpty(resctrl)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Resource control is not supported on this host"));
+ return NULL;
+ }
+
+ ret = virResctrlAllocNewFromInfo(resctrl);
+ if (!ret)
+ return NULL;
+
+ if (virFileReadValueString(&schemata,
+ SYSFS_RESCTRL_PATH
+ "/schemata") < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Could not read schemata file for the default group"));
+ goto error;
+ }
+
+ alloc = virResctrlAllocNew();
+ if (!alloc)
+ goto error;
+
+ if (virResctrlAllocParse(alloc, schemata) < 0)
+ goto error;
+ if (virResctrlAllocIsEmpty(alloc)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("No schemata for default resctrlfs group"));
+ goto error;
+ }
+ virResctrlAllocSubtract(ret, alloc);
+
+ if (virDirOpen(&dirp, SYSFS_RESCTRL_PATH) < 0)
+ goto error;
+
+ while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH)) > 0) {
+ if (ent->d_type != DT_DIR)
+ continue;
+
+ if (STREQ(ent->d_name, "info"))
+ continue;
+
+ VIR_FREE(schemata);
+ if (virFileReadValueString(&schemata,
+ SYSFS_RESCTRL_PATH
+ "/%s/schemata",
+ ent->d_name) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Could not read schemata file for group %s"),
+ ent->d_name);
+ goto error;
+ }
+
+ virObjectUnref(alloc);
+ alloc = virResctrlAllocNew();
+ if (!alloc)
+ goto error;
+
+ if (virResctrlAllocParse(alloc, schemata) < 0)
+ goto error;
+
+ virResctrlAllocSubtract(ret, alloc);
+
+ VIR_FREE(schemata);
+ }
+ if (rv < 0)
+ goto error;
+
+ cleanup:
+ virObjectUnref(alloc);
+ VIR_DIR_CLOSE(dirp);
+ VIR_FREE(schemata);
+ virBitmapFree(mask);
+ return ret;
+
+ error:
+ virObjectUnref(ret);
+ ret = NULL;
+ goto cleanup;
+}
+
+
+static int
+virResctrlAllocMasksAssign(virResctrlInfoPtr r_info,
+ virResctrlAllocPtr alloc)
+{
+ int ret = -1;
+ unsigned int level = 0;
+ virResctrlAllocPtr alloc_free = NULL;
+
+ if (!r_info) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Resource control is not supported on this host"));
+ return -1;
+ }
+
+ alloc_free = virResctrlAllocGetFree(r_info);
+ if (!alloc_free)
+ return -1;
+
+ for (level = 0; level < alloc->nlevels; level++) {
+ virResctrlAllocPerLevelPtr a_level = alloc->levels[level];
+ virResctrlAllocPerLevelPtr f_level = NULL;
+ unsigned int type = 0;
+
+ if (!a_level)
+ continue;
+
+ if (level < alloc_free->nlevels)
+ f_level = alloc_free->levels[level];
+
+ if (!f_level) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache level %d does not support tuning"),
+ level);
+ goto cleanup;
+ }
+
+ for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[type];
+ virResctrlAllocPerTypePtr f_type = f_level->types[type];
+ unsigned int cache = 0;
+
+ if (!a_type)
+ continue;
+
+ if (!f_type) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache level %d does not support tuning for "
+ "scope type '%s'"),
+ level, virCacheTypeToString(type));
+ goto cleanup;
+ }
+
+ for (cache = 0; cache < a_type->nsizes; cache++) {
+ unsigned long long *size = a_type->sizes[cache];
+ virBitmapPtr a_mask = NULL;
+ virBitmapPtr f_mask = NULL;
+ virResctrlInfoPerLevelPtr i_level = r_info->levels[level];
+ virResctrlInfoPerTypePtr i_type = i_level->types[type];
+ unsigned long long granularity;
+ unsigned long long need_bits;
+ size_t i = 0;
+ ssize_t pos = -1;
+ ssize_t last_bits = 0;
+ ssize_t last_pos = -1;
+
+ if (!size)
+ continue;
+
+ if (cache >= f_type->nmasks) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache with id %u does not exists for level %d"),
+ cache, level);
+ goto cleanup;
+ }
+
+ f_mask = f_type->masks[cache];
+ if (!f_mask) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache level %d id %u does not support tuning for "
+ "scope type '%s'"),
+ level, cache, virCacheTypeToString(type));
+ goto cleanup;
+ }
+
+ granularity = i_type->size / i_type->bits;
+ need_bits = *size / granularity;
+
+ if (*size % granularity) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache allocation of size %llu is not "
+ "divisible by granularity %llu"),
+ *size, granularity);
+ goto cleanup;
+ }
+
+ if (need_bits < i_type->min_cbm_bits) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache allocation of size %llu is smaller "
+ "than the minimum allowed allocation %llu"),
+ *size, granularity * i_type->min_cbm_bits);
+ goto cleanup;
+ }
+
+ while ((pos = virBitmapNextSetBit(f_mask, pos)) >= 0) {
+ ssize_t pos_clear = virBitmapNextClearBit(f_mask, pos);
+ ssize_t bits;
+
+ if (pos_clear < 0)
+ pos_clear = virBitmapSize(f_mask);
+
+ bits = pos_clear - pos;
+
+ /* Not enough bits, move on and skip all of them */
+ if (bits < need_bits) {
+ pos = pos_clear;
+ continue;
+ }
+
+ /* This fits perfectly */
+ if (bits == need_bits) {
+ last_pos = pos;
+ break;
+ }
+
+ /* Remember the smaller region if we already found on before */
+ if (last_pos < 0 || (last_bits && bits < last_bits)) {
+ last_bits = bits;
+ last_pos = pos;
+ }
+
+ pos = pos_clear;
+ }
+
+ if (last_pos < 0) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Not enough room for allocation of "
+ "%llu bytes for level %u cache %u "
+ "scope type '%s'"),
+ *size, level, cache,
+ virCacheTypeToString(type));
+ goto cleanup;
+ }
+
+ a_mask = virBitmapNew(i_type->bits);
+ for (i = last_pos; i < last_pos + need_bits; i++) {
+ ignore_value(virBitmapSetBit(a_mask, i));
+ ignore_value(virBitmapClearBit(f_mask, i));
+ }
+
+ if (a_type->nmasks <= cache) {
+ if (VIR_EXPAND_N(a_type->masks, a_type->nmasks,
+ cache - a_type->nmasks + 1) < 0) {
+ virBitmapFree(a_mask);
+ goto cleanup;
+ }
+ }
+ a_type->masks[cache] = a_mask;
+ }
+ }
+ }
+
+ ret = 0;
+ cleanup:
+ virObjectUnref(alloc_free);
+ return ret;
+}
+
+
+/* This checks if the directory for the alloc exists. If not it tries to create
+ * it and apply appropriate alloc settings. */
+int
+virResctrlAllocCreate(virResctrlInfoPtr r_info,
+ virResctrlAllocPtr alloc,
+ const char *drivername,
+ const char *machinename)
+{
+ char *schemata_path = NULL;
+ char *alloc_str = NULL;
+ int ret = -1;
+ int lockfd = -1;
+
+ if (!alloc)
+ return 0;
+
+ if (!r_info) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Resource control is not supported on this host"));
+ return -1;
+ }
+
+ if (!alloc->path &&
+ virAsprintf(&alloc->path, "%s/%s-%s-%s",
+ SYSFS_RESCTRL_PATH, drivername, machinename, alloc->id) < 0)
+ return -1;
+
+ /* Check if this allocation was already created */
+ if (virFileIsDir(alloc->path))
+ return 0;
+
+ if (virFileExists(alloc->path)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Path '%s' for resctrl allocation exists and is not a "
+ "directory"), alloc->path);
+ goto cleanup;
+ }
+
+ lockfd = virResctrlLockWrite();
+ if (lockfd < 0)
+ goto cleanup;
+
+ if (virResctrlAllocMasksAssign(r_info, alloc) < 0)
+ goto cleanup;
+
+ alloc_str = virResctrlAllocFormat(alloc);
+ if (!alloc_str)
+ goto cleanup;
+
+ if (virAsprintf(&schemata_path, "%s/schemata", alloc->path) < 0)
+ goto cleanup;
+
+ if (virFileMakePath(alloc->path) < 0) {
+ virReportSystemError(errno,
+ _("Cannot create resctrl directory '%s'"),
+ alloc->path);
+ goto cleanup;
+ }
+
+ if (virFileWriteStr(schemata_path, alloc_str, 0) < 0) {
+ rmdir(alloc->path);
+ virReportSystemError(errno,
+ _("Cannot write into schemata file '%s'"),
+ schemata_path);
+ goto cleanup;
+ }
+
+ ret = 0;
+ cleanup:
+ virResctrlUnlock(lockfd);
+ VIR_FREE(alloc_str);
+ VIR_FREE(schemata_path);
+ return ret;
+}
+
+
+int
+virResctrlAllocAddPID(virResctrlAllocPtr alloc,
+ pid_t pid)
+{
+ char *tasks = NULL;
+ char *pidstr = NULL;
+ int ret = 0;
+
+ if (!alloc->path) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot add pid to non-existing resctrl allocation"));
+ return -1;
+ }
+
+ if (virAsprintf(&tasks, "%s/tasks", alloc->path) < 0)
+ return -1;
+
+ if (virAsprintf(&pidstr, "%lld", (long long int) pid) < 0)
+ goto cleanup;
+
+ if (virFileWriteStr(tasks, pidstr, 0) < 0) {
+ virReportSystemError(errno,
+ _("Cannot write pid in tasks file '%s'"),
+ tasks);
+ goto cleanup;
+ }
+
+ ret = 0;
+ cleanup:
+ VIR_FREE(tasks);
+ VIR_FREE(pidstr);
+ return ret;
+}
+
+
+int
+virResctrlAllocRemove(virResctrlAllocPtr alloc)
+{
+ int ret = 0;
+
+ if (!alloc->path)
+ return 0;
+
+ VIR_DEBUG("Removing resctrl allocation %s", alloc->path);
+ if (rmdir(alloc->path) != 0 && errno != ENOENT) {
+ ret = -errno;
+ VIR_ERROR(_("Unable to remove %s (%d)"), alloc->path, errno);
+ }
+
+ return ret;
+}
diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
index 471c02f28dcd..0fbd9dd3acfb 100644
--- a/src/util/virresctrl.h
+++ b/src/util/virresctrl.h
@@ -68,4 +68,66 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
size_t *ncontrols,
virResctrlInfoPerCachePtr **controls);

+/* Alloc-related things */
+typedef struct _virResctrlAlloc virResctrlAlloc;
+typedef virResctrlAlloc *virResctrlAllocPtr;
+
+typedef int virResctrlAllocForeachSizeCallback(unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size,
+ void *opaque);
+
+virResctrlAllocPtr
+virResctrlAllocNew(void);
+
+bool
+virResctrlAllocIsEmpty(virResctrlAllocPtr resctrl);
+
+int
+virResctrlAllocUpdateSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size);
+
+int
+virResctrlAllocUpdateMask(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ virBitmapPtr mask);
+
+int
+virResctrlAllocSetSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size);
+
+int
+virResctrlAllocForeachSize(virResctrlAllocPtr resctrl,
+ virResctrlAllocForeachSizeCallback cb,
+ void *opaque);
+
+int
+virResctrlAllocSetID(virResctrlAllocPtr alloc,
+ const char *id);
+
+char *
+virResctrlAllocFormat(virResctrlAllocPtr alloc);
+
+int
+virResctrlAllocCreate(virResctrlInfoPtr r_info,
+ virResctrlAllocPtr alloc,
+ const char *drivername,
+ const char *machinename);
+
+int
+virResctrlAllocAddPID(virResctrlAllocPtr alloc,
+ pid_t pid);
+
+int
+virResctrlAllocRemove(virResctrlAllocPtr alloc);
+
#endif /* __VIR_RESCTRL_H__ */
diff --git a/src/util/virresctrlpriv.h b/src/util/virresctrlpriv.h
new file mode 100644
index 000000000000..932ff7329c8c
--- /dev/null
+++ b/src/util/virresctrlpriv.h
@@ -0,0 +1,27 @@
+/*
+ * virresctrlpriv.h:
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __VIR_RESCTRL_PRIV_H__
+# define __VIR_RESCTRL_PRIV_H__
+
+# include "virresctrl.h"
+
+virResctrlAllocPtr
+virResctrlAllocGetFree(virResctrlInfoPtr resctrl);
+
+#endif /* __VIR_RESCTRL_PRIV_H__ */
--
2.15.1
John Ferlan
2018-01-03 21:05:38 UTC
Permalink
Post by Martin Kletzander
With this commit we finally have a way to read and manipulate basic resctrl
settings. Locking is done only on exposed functions that read/write from/to
resctrlfs. Not in functions that are exposed in virresctrlpriv.h as those are
only supposed to be used from tests.
---
src/Makefile.am | 2 +-
src/libvirt_private.syms | 11 +
src/util/virresctrl.c | 1041 ++++++++++++++++++++++++++++++++++++++++++++-
src/util/virresctrl.h | 62 +++
src/util/virresctrlpriv.h | 27 ++
5 files changed, 1141 insertions(+), 2 deletions(-)
create mode 100644 src/util/virresctrlpriv.h
Geez, as referenced by the cover letter, I'm glad this is the non way
too complicated parts of the code (tongue firmly planted in my cheek
with the obligatory eye roll emoji).

So, IIUC virResctrlInfoPtr is the "host" world and virResctrlAllocPtr is
the "guest" world, right? If so might be something to add to the commit
messages to make things just slightly more clear.

Lots of code here - hopefully another set of eyes can look at this too -
I'm sure I'll miss something as it's been very difficult to review this
in one (long) session...

Note to self, no sense in grep-ing for "alloc" or "free" any more after
this code is pushed as they're liberally used. Kind of funny to see
"alloc_free" in the same line ;-)

The other usage that was really confusing is @cache w/ @[n]masks and
@[n]sizes. It seems to be used as the array index for both and it's
never really crystal clear over the relationship between masks and sizes.
Post by Martin Kletzander
diff --git a/src/Makefile.am b/src/Makefile.am
index 4c022d1e44b9..9b4fd0c1d597 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -167,7 +167,7 @@ UTIL_SOURCES = \
util/virprocess.c util/virprocess.h \
util/virqemu.c util/virqemu.h \
util/virrandom.h util/virrandom.c \
- util/virresctrl.h util/virresctrl.c \
+ util/virresctrl.h util/virresctrl.c util/virresctrlpriv.h \
util/virrotatingfile.h util/virrotatingfile.c \
util/virscsi.c util/virscsi.h \
util/virscsihost.c util/virscsihost.h \
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 1a4f304f5e1f..a8009d913669 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2548,6 +2548,17 @@ virRandomInt;
# util/virresctrl.h
virCacheTypeFromString;
virCacheTypeToString;
+virResctrlAllocAddPID;
+virResctrlAllocCreate;
+virResctrlAllocForeachSize;
+virResctrlAllocFormat;
+virResctrlAllocGetFree;
+virResctrlAllocNew;
+virResctrlAllocRemove;
+virResctrlAllocSetID;
+virResctrlAllocSetSize;
+virResctrlAllocUpdateMask;
+virResctrlAllocUpdateSize;
virResctrlGetInfo;
virResctrlInfoGetCache;
virResctrlInfoNew;
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index a681322905be..32ab84b6f121 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -18,8 +18,12 @@
#include <config.h>
-#include "virresctrl.h"
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "virresctrlpriv.h"
#include "c-ctype.h"
#include "count-one-bits.h"
#include "viralloc.h"
@@ -151,6 +155,156 @@ virResctrlInfoNew(void)
}
+/* Alloc-related definitions and AllocClass-related functions */
+typedef struct _virResctrlAllocPerType virResctrlAllocPerType;
+typedef virResctrlAllocPerType *virResctrlAllocPerTypePtr;
+struct _virResctrlAllocPerType {
+ /* There could be bool saying whether this is set or not, but since everything
+ * in virResctrlAlloc (and most of libvirt) goes with pointer arrays we would
+ * have to have one more level of allocation anyway, so this stays faithful to
+ * the concept */
+ unsigned long long **sizes;
+ size_t nsizes;
Perhaps something I should have noted in patch 2 - these @sizes are what
exactly? Is it a page size or just a "max size" in ?bytes for something
stored in the cache?
Post by Martin Kletzander
+
+ /* Mask for each cache */
+ virBitmapPtr *masks;
+ size_t nmasks;
And of course, what does the mask represent?

Just a quick comment would suffice - for the future readers as well.
Post by Martin Kletzander
+};
+
+typedef struct _virResctrlAllocPerLevel virResctrlAllocPerLevel;
+typedef virResctrlAllocPerLevel *virResctrlAllocPerLevelPtr;
+struct _virResctrlAllocPerLevel {
+ virResctrlAllocPerTypePtr *types; /* Indexed with enum virCacheType */
+};
+
+struct _virResctrlAlloc {
+ virObject parent;
+
+ virResctrlAllocPerLevelPtr *levels;
+ size_t nlevels;
These represent the "L#" for the directory, right? Examples would be
L1both, L2code, l3data, right?

Just trying to provide enough information so that some future reader
will have a chance to understand the design at it's simplist level.
Post by Martin Kletzander
+
+ char *id; /* The identifier (any unique string for now) */
we could call this uniqueId too, IDC really though. Just having "id"
makes it a bit difficult to search on (cacheUID also works).
Post by Martin Kletzander
+ char *path;
This is our generated path to the guest cache allocation data.
Post by Martin Kletzander
+};
+
+static virClassPtr virResctrlAllocClass;
+
+static void
+virResctrlAllocDispose(void *obj)
+{
+ size_t i = 0;
+ size_t j = 0;
+ size_t k = 0;
+
+ virResctrlAllocPtr resctrl = obj;
+
+ for (i = 0; i < resctrl->nlevels; i++) {
+ virResctrlAllocPerLevelPtr level = resctrl->levels[i];
+
+ if (!level)
+ continue;
Looking at virResctrlAllocGetType, it's possible that ->levels is
allocated, but ->levels[level]->types is not, thus potentially causing
problems for the following loop.

I think a " || !level->types" above would do the trick.
Post by Martin Kletzander
+
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlAllocPerTypePtr type = level->types[j];
+
+ if (!type)
+ continue;
+
+ for (k = 0; k < type->nsizes; k++)
+ VIR_FREE(type->sizes[k]);
+
+ for (k = 0; k < type->nmasks; k++)
+ virBitmapFree(type->masks[k]);
+
+ VIR_FREE(type->sizes);
+ VIR_FREE(type->masks);
+ VIR_FREE(type);
+ }
+ VIR_FREE(level->types);
+ VIR_FREE(level);
+ }
+
+ VIR_FREE(resctrl->id);
+ VIR_FREE(resctrl->path);
+ VIR_FREE(resctrl->levels);
+}
+
+
+static int
+virResctrlAllocOnceInit(void)
+{
+ if (!(virResctrlAllocClass = virClassNew(virClassForObject(),
+ "virResctrlAlloc",
+ sizeof(virResctrlAlloc),
+ virResctrlAllocDispose)))
+ return -1;
+
+ return 0;
+}
+
+
+VIR_ONCE_GLOBAL_INIT(virResctrlAlloc)
+
+
+virResctrlAllocPtr
+virResctrlAllocNew(void)
+{
+ if (virResctrlAllocInitialize() < 0)
+ return NULL;
+
+ return virObjectNew(virResctrlAllocClass);
+}
+
+
+/* Common functions */
+static int
+virResctrlLockInternal(int op)
+{
+ int fd = open(SYSFS_RESCTRL_PATH, O_DIRECTORY | O_CLOEXEC);
I started reviewing bottom up and wondered later on - do we really want
to be writing into /sys/fs/resctrl? See virResctrlAllocCreate comments.
Post by Martin Kletzander
+
+ if (fd < 0) {
+ virReportSystemError(errno, "%s", _("Cannot open resctrlfs"));
+ return -1;
+ }
+
+ if (flock(fd, op) < 0) {
+ virReportSystemError(errno, "%s", _("Cannot lock resctrlfs"));
+ VIR_FORCE_CLOSE(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+
+static inline int
+virResctrlLockWrite(void)
+{
+ return virResctrlLockInternal(LOCK_EX);
+}
+
+
+static int
+virResctrlUnlock(int fd)
+{
+ if (fd == -1)
+ return 0;
+
+ /* The lock gets unlocked by closing the fd, which we need to do anyway in
+ * order to clean up properly */
+ if (VIR_CLOSE(fd) < 0) {
+ virReportSystemError(errno, "%s", _("Cannot close resctrlfs"));
+
+ /* Trying to save the already broken */
+ if (flock(fd, LOCK_UN) < 0)
+ virReportSystemError(errno, "%s", _("Cannot unlock resctrlfs"));
+ return -1;
+ }
+
+ return 0;
+}
+
+
/* Info-related functions */
bool
virResctrlInfoIsEmpty(virResctrlInfoPtr resctrl)
@@ -354,3 +508,888 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
VIR_FREE(*controls);
goto cleanup;
}
+
+
+/* Alloc-related functions */
+bool
+virResctrlAllocIsEmpty(virResctrlAllocPtr resctrl)
+{
+ size_t i = 0;
+ size_t j = 0;
+ size_t k = 0;
+
+ if (!resctrl)
+ return true;
+
Rather than numerous loops - would it perhaps be better to have a single
boolean field that gets set whenever a "sizes" or "masks" entry is set?
Of course this would all need to be locked, right?
Post by Martin Kletzander
+ for (i = 0; i < resctrl->nlevels; i++) {
+ virResctrlAllocPerLevelPtr a_level = resctrl->levels[i];
+
+ if (!a_level)
+ continue;
+
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[j];
+
+ if (!a_type)
+ continue;
+
+ for (k = 0; k < a_type->nsizes; k++) {
+ if (a_type->sizes[k])
+ return false;
+ }
+
+ for (k = 0; k < a_type->nmasks; k++) {
+ if (a_type->masks[k])
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+
+static virResctrlAllocPerTypePtr
+virResctrlAllocGetType(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type)
+{
+ virResctrlAllocPerLevelPtr a_level = NULL;
+
+ if (resctrl->nlevels <= level &&
+ VIR_EXPAND_N(resctrl->levels, resctrl->nlevels, level - resctrl->nlevels + 1) < 0)
+ return NULL;
+
+ if (!resctrl->levels[level] &&
+ (VIR_ALLOC(resctrl->levels[level]) < 0 ||
+ VIR_ALLOC_N(resctrl->levels[level]->types, VIR_CACHE_TYPE_LAST) < 0))
+ return NULL;
Could have a problem if ->levels[level] ALLOC succeeds, but
->levels[level]->types ALLOC_N fails vis-a-vis the assumption in Dispose.
Post by Martin Kletzander
+
+ a_level = resctrl->levels[level];
+
+ if (!a_level->types[type] && VIR_ALLOC(a_level->types[type]) < 0)
+ return NULL;
+
+ return a_level->types[type];
+}
+
+
+int
+virResctrlAllocUpdateMask(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ virBitmapPtr mask)
So far, only ever called from virresctrl.c, so can this be static? Or
are there future plans?
Post by Martin Kletzander
+{
+ virResctrlAllocPerTypePtr a_type = virResctrlAllocGetType(resctrl, level, type);
+
+ if (!a_type)
+ return -1;
+
+ if (a_type->nmasks <= cache &&
+ VIR_EXPAND_N(a_type->masks, a_type->nmasks,
+ cache - a_type->nmasks + 1) < 0)
+ return -1;
+
+ if (!a_type->masks[cache]) {
+ a_type->masks[cache] = virBitmapNew(virBitmapSize(mask));
+
+ if (!a_type->masks[cache])
+ return -1;
+ }
Since this is an Update function, is it possible that the incoming mask
is a different size than what was already existing? IOW: In an else
condition, do we need to compare the size of the source and destination
before the Copy? And if so, Free the current and allocate one using the
new size...
Post by Martin Kletzander
+
+ return virBitmapCopy(a_type->masks[cache], mask);
In order to speed up IsEmpty we could set a boolean indicating the
change to resctrl
Post by Martin Kletzander
+}
+
+
+int
+virResctrlAllocUpdateSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size)
Same here, only called from virresctrl.c
Post by Martin Kletzander
+{
+ virResctrlAllocPerTypePtr a_type = virResctrlAllocGetType(resctrl, level, type);
+
+ if (!a_type)
+ return -1;
+
+ if (a_type->nsizes <= cache &&
+ VIR_EXPAND_N(a_type->sizes, a_type->nsizes,
+ cache - a_type->nsizes + 1) < 0)
+ return -1;
+
+ if (!a_type->sizes[cache] && VIR_ALLOC(a_type->sizes[cache]) < 0)
+ return -1;
+
+ *(a_type->sizes[cache]) = size;
+
Same comment regarding IsEmpty and setting a boolean...
Post by Martin Kletzander
+ return 0;
+}
+
+
+static bool
+virResctrlAllocCheckCollision(virResctrlAllocPtr a,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache)
+{
+ virResctrlAllocPerLevelPtr a_level = NULL;
+ virResctrlAllocPerTypePtr a_type = NULL;
+
+ if (!a)
+ return false;
+
+ if (a->nlevels <= level)
+ return false;
+
+ a_level = a->levels[level];
+
+ if (!a_level)
+ return false;
+
+ /* All types should be always allocated */
+ a_type = a_level->types[VIR_CACHE_TYPE_BOTH];
+
+ /* If there is an allocation for type 'both', there can be no other
+ * allocation for the same cache */
+ if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+ return true;
+
+ if (type == VIR_CACHE_TYPE_BOTH) {
+ a_type = a_level->types[VIR_CACHE_TYPE_CODE];
+
+ if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+ return true;
+
+ a_type = a_level->types[VIR_CACHE_TYPE_DATA];
+
+ if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+ return true;
+ } else {
+ a_type = a_level->types[type];
+
+ if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+ return true;
+ }
This is simple yet confusing all in the same pile of code. I'm sure it
helps to have the overall design in mind though.

Would it even matter to check any of this if IsEmpty returns true?
Post by Martin Kletzander
+
+ return false;
+}
+
+
+int
+virResctrlAllocSetSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size)
+{
+ if (virResctrlAllocCheckCollision(resctrl, level, type, cache)) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("Colliding cache allocations for cache "
+ "level '%u' id '%u', type '%s'"),
+ level, cache, virCacheTypeToString(type));
+ return -1;
+ }
+
+ return virResctrlAllocUpdateSize(resctrl, level, type, cache, size);
+}
+
+
+int
+virResctrlAllocForeachSize(virResctrlAllocPtr resctrl,
+ virResctrlAllocForeachSizeCallback cb,
+ void *opaque)
+{
+ int ret = 0;
+ unsigned int level = 0;
+ unsigned int type = 0;
+ unsigned int cache = 0;
+
+ if (!resctrl)
+ return 0;
Is it possible there are no sizes set?
Post by Martin Kletzander
+
+ for (level = 0; level < resctrl->nlevels; level++) {
+ virResctrlAllocPerLevelPtr a_level = resctrl->levels[level];
+
+ if (!a_level)
+ continue;
+
+ for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[type];
+
+ if (!a_type)
+ continue;
+
+ for (cache = 0; cache < a_type->nsizes; cache++) {
+ unsigned long long *size = a_type->sizes[cache];
+
+ if (!size)
+ continue;
+
+ ret = cb(level, type, cache, *size, opaque);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+int
+virResctrlAllocSetID(virResctrlAllocPtr alloc,
+ const char *id)
+{
+ if (!id) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Resctrl allocation 'id' cannot be NULL"));
+ return -1;
+ }
+
+ return VIR_STRDUP(alloc->id, id);
+}
+
+
Just a few words here about the expected format you'll be formatting and
parsing would be helpful. Still not 100% clear how [n]sizes comes into
play. When you're saving/re-reading I guess I would have thought (for
some reason) that perhaps sizes would be important
Post by Martin Kletzander
+char *
+virResctrlAllocFormat(virResctrlAllocPtr resctrl)
So far only ever called from virresctrl.c
Post by Martin Kletzander
+{
+ virBuffer buf = VIR_BUFFER_INITIALIZER;
+ unsigned int level = 0;
+ unsigned int type = 0;
+ unsigned int cache = 0;
+
+ if (!resctrl)
+ return NULL;
+
+ for (level = 0; level < resctrl->nlevels; level++) {
+ virResctrlAllocPerLevelPtr a_level = resctrl->levels[level];
+
+ if (!a_level)
+ continue;
+
+ for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[type];
+
+ if (!a_type)
+ continue;
+
+ virBufferAsprintf(&buf, "L%u%s:", level, virResctrlTypeToString(type));
+
+ for (cache = 0; cache < a_type->nmasks; cache++) {
+ virBitmapPtr mask = a_type->masks[cache];
+ char *mask_str = NULL;
+
+ if (!mask)
+ continue;
+
+ mask_str = virBitmapToString(mask, false, true);
+ if (!mask_str) {
+ virBufferFreeAndReset(&buf);
+ return NULL;
+ }
+
+ virBufferAsprintf(&buf, "%u=%s;", cache, mask_str);
+ VIR_FREE(mask_str);
+ }
+
+ virBufferTrim(&buf, ";", 1);
+ virBufferAddChar(&buf, '\n');
+ }
+ }
+
+ virBufferCheckError(&buf);
+ return virBufferContentAndReset(&buf);
Joy - some day it'd be nice to get back to the code that was going to
combine the CheckError and ContentAndReset... In the meantime, once this
is pushed I'll have another Coverity whine to filter (since CheckError
doesn't check status like 217 out of 220 calls do).
Post by Martin Kletzander
+}
+
+
+static int
+virResctrlAllocParseProcessCache(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ char *cache)
+{
+ char *tmp = strchr(cache, '=');
+ unsigned int cache_id = 0;
+ virBitmapPtr mask = NULL;
+ int ret = -1;
+
+ if (!tmp)
+ return 0;
+
+ *tmp = '\0';
+ tmp++;
+
+ if (virStrToLong_uip(cache, NULL, 10, &cache_id) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Invalid cache id '%s'"), cache);
+ return -1;
+ }
+
+ mask = virBitmapNewString(tmp);
+ if (!mask)
+ return -1;
+
+ if (virResctrlAllocUpdateMask(resctrl, level, type, cache_id, mask) < 0)
+ goto cleanup;
+
+ ret = 0;
+ virBitmapFree(mask);
+ return ret;
+}
+
+
+static int
+virResctrlAllocParseProcessLine(virResctrlAllocPtr resctrl,
+ char *line)
+{
+ char **caches = NULL;
+ char *tmp = NULL;
+ unsigned int level = 0;
+ int type = -1;
+ size_t ncaches = 0;
+ size_t i = 0;
+ int ret = -1;
+
+ /* Skip lines that don't concern caches, e.g. MB: etc. */
+ if (line[0] != 'L')
+ return 0;
+
+ /* And lines that we can't parse too */
+ tmp = strchr(line, ':');
+ if (!tmp)
+ return 0;
+
+ *tmp = '\0';
+ tmp++;
+
+ if (virStrToLong_uip(line + 1, &line, 10, &level) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Cannot parse resctrl schema level '%s'"),
+ line + 1);
+ return -1;
+ }
+
+ type = virResctrlTypeFromString(line);
+ if (type < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Cannot parse resctrl schema level '%s'"),
+ line + 1);
+ return -1;
+ }
+
+ caches = virStringSplitCount(tmp, ";", 0, &ncaches);
+ if (!caches)
+ return 0;
+
+ for (i = 0; i < ncaches; i++) {
+ if (virResctrlAllocParseProcessCache(resctrl, level, type, caches[i]) < 0)
+ goto cleanup;
+ }
+
+ ret = 0;
+ virStringListFree(caches);
+ return ret;
+}
+
+
+static int
+virResctrlAllocParse(virResctrlAllocPtr alloc,
+ const char *schemata)
+{
+ char **lines = NULL;
+ size_t nlines = 0;
+ size_t i = 0;
+ int ret = -1;
+
+ lines = virStringSplitCount(schemata, "\n", 0, &nlines);
+ for (i = 0; i < nlines; i++) {
+ if (virResctrlAllocParseProcessLine(alloc, lines[i]) < 0)
+ goto cleanup;
+ }
+
+ ret = 0;
+ virStringListFree(lines);
+ return ret;
+}
+
+
+static void
+virResctrlAllocSubtractPerType(virResctrlAllocPerTypePtr a,
+ virResctrlAllocPerTypePtr b)
+{
+ size_t i = 0;
+
+ if (!a || !b)
+ return;
+
+ for (i = 0; i < a->nmasks && i < b->nmasks; ++i) {
+ if (a->masks[i] && b->masks[i])
+ virBitmapSubtract(a->masks[i], b->masks[i]);
+ }
Does [n]sizes affect anything? IOW: does it matter.. probably not, but
my brain is overloaded right now!
Post by Martin Kletzander
+}
+
+
+static void
+virResctrlAllocSubtract(virResctrlAllocPtr a,
+ virResctrlAllocPtr b)
perhaps easier to "think about" if using dst and src... here and above.
Post by Martin Kletzander
+{
+ size_t i = 0;
+ size_t j = 0;
+
+ if (!b)
+ return;
+
+ for (i = 0; i < a->nlevels && b->nlevels; ++i) {
b->nlevels is not a boolean or pointer... should be "i < " too right?

Any special reason to use ++i over i++?
Post by Martin Kletzander
+ if (a->levels[i] && b->levels[i]) {
+ /* Here we rely on all the system allocations to use the same types.
+ * We kind of _hope_ it's the case. If this is left here until the
+ * review and someone finds it, then suggest only removing this last
+ * sentence. */
Your golden egg has been discovered and I agree, how do we know? and of
course what role does CheckCollision (eventually) play?
Post by Martin Kletzander
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlAllocSubtractPerType(a->levels[i]->types[j],
+ b->levels[i]->types[j]);
+ }
+ }
+ }
+}
+
+
+static virResctrlAllocPtr
+virResctrlAllocNewFromInfo(virResctrlInfoPtr info)
+{
+ size_t i = 0;
+ size_t j = 0;
+ size_t k = 0;
+ virResctrlAllocPtr ret = virResctrlAllocNew();
+ virBitmapPtr mask = NULL;
+
+ if (!ret)
+ return NULL;
+
+ for (i = 0; i < info->nlevels; i++) {
+ virResctrlInfoPerLevelPtr i_level = info->levels[i];
+
+ if (!i_level)
+ continue;
+
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlInfoPerTypePtr i_type = i_level->types[j];
+
+ if (!i_type)
+ continue;
+
+ virBitmapFree(mask);
+ mask = virBitmapNew(i_type->bits);
+ if (!mask)
+ goto error;
+ virBitmapSetAll(mask);
+
+ for (k = 0; k <= i_type->max_cache_id; k++) {
+ if (virResctrlAllocUpdateMask(ret, i, j, k, mask) < 0)
+ goto error;
+ }
+ }
+ }
+
+ virBitmapFree(mask);
+ return ret;
+ virObjectUnref(ret);
+ ret = NULL;
+ goto cleanup;
+}
+
+
This needs a function header - args, what does it do, etc.
Post by Martin Kletzander
+virResctrlAllocPtr
+virResctrlAllocGetFree(virResctrlInfoPtr resctrl)
So far only ever called from virresctrl.c and from a static function, so
is it really necessary to be external?

It's also really "odd" to have Free in the name and this isn't
necessarily a Free type function - far from it! Perhaps Unused is closer
to reality with the usage of the Subtract calls. Of course that could
affect variable names selected already...
Post by Martin Kletzander
+{
+ virResctrlAllocPtr ret = NULL;
+ virResctrlAllocPtr alloc = NULL;
+ virBitmapPtr mask = NULL;
@mask is unused in this context except for the virBitmapFree
Post by Martin Kletzander
+ struct dirent *ent = NULL;
+ DIR *dirp = NULL;
+ char *schemata = NULL;
+ int rv = -1;
+
+ if (virResctrlInfoIsEmpty(resctrl)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Resource control is not supported on this host"));
+ return NULL;
+ }
+
+ ret = virResctrlAllocNewFromInfo(resctrl);
+ if (!ret)
+ return NULL;
+
+ if (virFileReadValueString(&schemata,
+ SYSFS_RESCTRL_PATH
+ "/schemata") < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Could not read schemata file for the default group"));
+ goto error;
+ }
Shall I assume that schemata is a CAT specific file? So is "default
group" the right wording? Just seems strange - consistently used
though... If changed you'd have multiple changes. I guess something is
my mind is wanting to associate it with a cgroup.
Post by Martin Kletzander
+
+ alloc = virResctrlAllocNew();
+ if (!alloc)
+ goto error;
+
+ if (virResctrlAllocParse(alloc, schemata) < 0)
+ goto error;
+ if (virResctrlAllocIsEmpty(alloc)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("No schemata for default resctrlfs group"));
+ goto error;
+ }
+ virResctrlAllocSubtract(ret, alloc);
+
+ if (virDirOpen(&dirp, SYSFS_RESCTRL_PATH) < 0)
+ goto error;
+
+ while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH)) > 0) {
+ if (ent->d_type != DT_DIR)
+ continue;
+
+ if (STREQ(ent->d_name, "info"))
+ continue;
+
+ VIR_FREE(schemata);
+ if (virFileReadValueString(&schemata,
+ SYSFS_RESCTRL_PATH
+ "/%s/schemata",
+ ent->d_name) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Could not read schemata file for group %s"),
+ ent->d_name);
+ goto error;
+ }
+
+ virObjectUnref(alloc);
+ alloc = virResctrlAllocNew();
+ if (!alloc)
+ goto error;
+
+ if (virResctrlAllocParse(alloc, schemata) < 0)
+ goto error;
+
+ virResctrlAllocSubtract(ret, alloc);
+
+ VIR_FREE(schemata);
I think this one could be duplicitous
Post by Martin Kletzander
+ }
+ if (rv < 0)
+ goto error;
+
+ virObjectUnref(alloc);
+ VIR_DIR_CLOSE(dirp);
+ VIR_FREE(schemata);
+ virBitmapFree(mask);
+ return ret;
+
+ virObjectUnref(ret);
+ ret = NULL;
+ goto cleanup;
+}
+
+
Could use a short (haha) description for all the magic that happens here!
Post by Martin Kletzander
+static int
+virResctrlAllocMasksAssign(virResctrlInfoPtr r_info,
s/r_info/resctrl/ for consistency?
Post by Martin Kletzander
+ virResctrlAllocPtr alloc)
+{
+ int ret = -1;
+ unsigned int level = 0;
+ virResctrlAllocPtr alloc_free = NULL;
+
+ if (!r_info) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Resource control is not supported on this host"));
+ return -1;
+ }
Caller already checks this... and it's a static function...
Post by Martin Kletzander
+
+ alloc_free = virResctrlAllocGetFree(r_info);
+ if (!alloc_free)
+ return -1;
+
+ for (level = 0; level < alloc->nlevels; level++) {
+ virResctrlAllocPerLevelPtr a_level = alloc->levels[level];
+ virResctrlAllocPerLevelPtr f_level = NULL;
+ unsigned int type = 0;
+
+ if (!a_level)
+ continue;
+
+ if (level < alloc_free->nlevels)
+ f_level = alloc_free->levels[level];
+
+ if (!f_level) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache level %d does not support tuning"),
+ level);
+ goto cleanup;
+ }
+
+ for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[type];
+ virResctrlAllocPerTypePtr f_type = f_level->types[type];
+ unsigned int cache = 0;
+
+ if (!a_type)
+ continue;
+
+ if (!f_type) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache level %d does not support tuning for "
+ "scope type '%s'"),
+ level, virCacheTypeToString(type));
+ goto cleanup;
+ }
+
large wet piles of brain matter all over my keyboard for this following
loop. It is 100% not self documenting - I'm not sure WTF is going on.
Post by Martin Kletzander
+ for (cache = 0; cache < a_type->nsizes; cache++) {
+ unsigned long long *size = a_type->sizes[cache];
+ virBitmapPtr a_mask = NULL;
+ virBitmapPtr f_mask = NULL;
+ virResctrlInfoPerLevelPtr i_level = r_info->levels[level];
+ virResctrlInfoPerTypePtr i_type = i_level->types[type];
+ unsigned long long granularity;
+ unsigned long long need_bits;
+ size_t i = 0;
+ ssize_t pos = -1;
+ ssize_t last_bits = 0;
+ ssize_t last_pos = -1;
+
+ if (!size)
+ continue;
+
+ if (cache >= f_type->nmasks) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache with id %u does not exists for level %d"),
+ cache, level);
+ goto cleanup;
+ }
+
+ f_mask = f_type->masks[cache];
+ if (!f_mask) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache level %d id %u does not support tuning for "
+ "scope type '%s'"),
+ level, cache, virCacheTypeToString(type));
+ goto cleanup;
+ }
+
+ granularity = i_type->size / i_type->bits;
+ need_bits = *size / granularity;
+
+ if (*size % granularity) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache allocation of size %llu is not "
+ "divisible by granularity %llu"),
+ *size, granularity);
+ goto cleanup;
+ }
+
+ if (need_bits < i_type->min_cbm_bits) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache allocation of size %llu is smaller "
+ "than the minimum allowed allocation %llu"),
+ *size, granularity * i_type->min_cbm_bits);
+ goto cleanup;
+ }
+
+ while ((pos = virBitmapNextSetBit(f_mask, pos)) >= 0) {
+ ssize_t pos_clear = virBitmapNextClearBit(f_mask, pos);
+ ssize_t bits;
+
+ if (pos_clear < 0)
+ pos_clear = virBitmapSize(f_mask);
+
+ bits = pos_clear - pos;
+
+ /* Not enough bits, move on and skip all of them */
+ if (bits < need_bits) {
+ pos = pos_clear;
+ continue;
+ }
+
+ /* This fits perfectly */
+ if (bits == need_bits) {
+ last_pos = pos;
+ break;
+ }
+
+ /* Remember the smaller region if we already found on before */
+ if (last_pos < 0 || (last_bits && bits < last_bits)) {
+ last_bits = bits;
+ last_pos = pos;
+ }
+
+ pos = pos_clear;
+ }
+
+ if (last_pos < 0) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Not enough room for allocation of "
+ "%llu bytes for level %u cache %u "
+ "scope type '%s'"),
+ *size, level, cache,
+ virCacheTypeToString(type));
+ goto cleanup;
+ }
+
+ a_mask = virBitmapNew(i_type->bits);
+ for (i = last_pos; i < last_pos + need_bits; i++) {
+ ignore_value(virBitmapSetBit(a_mask, i));
+ ignore_value(virBitmapClearBit(f_mask, i));
+ }
+
+ if (a_type->nmasks <= cache) {
+ if (VIR_EXPAND_N(a_type->masks, a_type->nmasks,
+ cache - a_type->nmasks + 1) < 0) {
+ virBitmapFree(a_mask);
+ goto cleanup;
+ }
+ }
+ a_type->masks[cache] = a_mask;
+ }
+ }
+ }
+
+ ret = 0;
+ virObjectUnref(alloc_free);
+ return ret;
+}
+
+
+/* This checks if the directory for the alloc exists. If not it tries to create
+ * it and apply appropriate alloc settings. */
An overly simplified description for all that happens here.
Post by Martin Kletzander
+int
+virResctrlAllocCreate(virResctrlInfoPtr r_info,
s/r_info/resctrl/ for consistency?
Post by Martin Kletzander
+ virResctrlAllocPtr alloc,
+ const char *drivername,
+ const char *machinename)
+{
+ char *schemata_path = NULL;
+ char *alloc_str = NULL;
+ int ret = -1;
+ int lockfd = -1;
+
+ if (!alloc)
+ return 0;
+
+ if (!r_info) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Resource control is not supported on this host"));
+ return -1;
+ }
+
+ if (!alloc->path &&
+ virAsprintf(&alloc->path, "%s/%s-%s-%s",
+ SYSFS_RESCTRL_PATH, drivername, machinename, alloc->id) < 0)
+ return -1;
FWIW: From one of the .0 responses, this is where you get that
"qemu-qemu..." as @drivername is "qemu". I think the @drivername could
be superfluous, but it's not 100% clear what the options from the
systemd calls to build @machinename are...

In any case, if I'm reading things correctly it seems we would be saving
the guest data in the /sys/fs/resctrl path - is that the "right" place?
I can see for Info (e.g. host) level data that's read, sure, but for
guest (or Alloc) data that's written (and read) it's less clear in my
mind. Worried a bit about polluting /sys, some permissions issues
there, and a bit of future proofing - similar to cgroup layouts which
haven't remained constant through time. It would seem guests using this
would need to be sufficiently privileged or essentially not a session
guest...

Shouldn't things go in the guest libDir? IOW: vm->privateData->libDir
that could be passed from qemuProcessResctrlCreate and avoid replicating
the build-up logic here?

I would think avoiding /sys for guest data would probably be a good
idea. If the host dies how do we clean up after ourselves? At least if
we use guest directories, we can 'easily' document how to clean up.
Post by Martin Kletzander
+
+ /* Check if this allocation was already created */
+ if (virFileIsDir(alloc->path))
+ return 0;
+
+ if (virFileExists(alloc->path)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Path '%s' for resctrl allocation exists and is not a "
+ "directory"), alloc->path);
+ goto cleanup;
+ }
+
+ lockfd = virResctrlLockWrite();
+ if (lockfd < 0)
+ goto cleanup;
+
+ if (virResctrlAllocMasksAssign(r_info, alloc) < 0)
+ goto cleanup;
+
+ alloc_str = virResctrlAllocFormat(alloc);
+ if (!alloc_str)
+ goto cleanup;
+
There is a *lot* of magic going on here regarding how the above calls
will end up generating alloc_str and then how this schemata file gets
populated with data that I really think is not self documenting.
Post by Martin Kletzander
+ if (virAsprintf(&schemata_path, "%s/schemata", alloc->path) < 0)
+ goto cleanup;
+
+ if (virFileMakePath(alloc->path) < 0) {
+ virReportSystemError(errno,
+ _("Cannot create resctrl directory '%s'"),
+ alloc->path);
+ goto cleanup;
+ }
+
+ if (virFileWriteStr(schemata_path, alloc_str, 0) < 0) {
+ rmdir(alloc->path);
open coded virResctrlAllocRemove... could go with virFileDeleteTree too
Post by Martin Kletzander
+ virReportSystemError(errno,
+ _("Cannot write into schemata file '%s'"),
+ schemata_path);
+ goto cleanup;
+ }
+
+ ret = 0;
+ virResctrlUnlock(lockfd);
+ VIR_FREE(alloc_str);
+ VIR_FREE(schemata_path);
+ return ret;
+}
+
+
Might be nice to describe what this is for... Why does this need to be
generated/saved?
Post by Martin Kletzander
+int
+virResctrlAllocAddPID(virResctrlAllocPtr alloc,
+ pid_t pid)
+{
+ char *tasks = NULL;
+ char *pidstr = NULL;
+ int ret = 0;
+
+ if (!alloc->path) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot add pid to non-existing resctrl allocation"));
+ return -1;
+ }
+
Should we do any locking here?
Post by Martin Kletzander
+ if (virAsprintf(&tasks, "%s/tasks", alloc->path) < 0)
+ return -1;
+
+ if (virAsprintf(&pidstr, "%lld", (long long int) pid) < 0)
+ goto cleanup;
+
+ if (virFileWriteStr(tasks, pidstr, 0) < 0) {
+ virReportSystemError(errno,
+ _("Cannot write pid in tasks file '%s'"),
+ tasks);
+ goto cleanup;
+ }
We only ever write to this file a @pidstr, what happens when the vCPU is
unplugged? Can the cachetunes be updated in that manner?
Post by Martin Kletzander
+
+ ret = 0;
+ VIR_FREE(tasks);
+ VIR_FREE(pidstr);
+ return ret;
+}
+
+
+int
+virResctrlAllocRemove(virResctrlAllocPtr alloc)
+{
+ int ret = 0;
+
+ if (!alloc->path)
+ return 0;
+
+ VIR_DEBUG("Removing resctrl allocation %s", alloc->path);
+ if (rmdir(alloc->path) != 0 && errno != ENOENT) {
Is it only one level of directory or multiple? e.g. would
virFileDeleteTree be better? In fact overall it may be better.

I hope I found everything ;-)

John
Post by Martin Kletzander
+ ret = -errno;
+ VIR_ERROR(_("Unable to remove %s (%d)"), alloc->path, errno);
+ }
+
+ return ret;
+}
diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
index 471c02f28dcd..0fbd9dd3acfb 100644
--- a/src/util/virresctrl.h
+++ b/src/util/virresctrl.h
@@ -68,4 +68,66 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
size_t *ncontrols,
virResctrlInfoPerCachePtr **controls);
+/* Alloc-related things */
+typedef struct _virResctrlAlloc virResctrlAlloc;
+typedef virResctrlAlloc *virResctrlAllocPtr;
+
+typedef int virResctrlAllocForeachSizeCallback(unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size,
+ void *opaque);
+
+virResctrlAllocPtr
+virResctrlAllocNew(void);
+
+bool
+virResctrlAllocIsEmpty(virResctrlAllocPtr resctrl);
+
+int
+virResctrlAllocUpdateSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size);
+
+int
+virResctrlAllocUpdateMask(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ virBitmapPtr mask);
+
+int
+virResctrlAllocSetSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size);
+
+int
+virResctrlAllocForeachSize(virResctrlAllocPtr resctrl,
+ virResctrlAllocForeachSizeCallback cb,
+ void *opaque);
+
+int
+virResctrlAllocSetID(virResctrlAllocPtr alloc,
+ const char *id);
+
+char *
+virResctrlAllocFormat(virResctrlAllocPtr alloc);
+
+int
+virResctrlAllocCreate(virResctrlInfoPtr r_info,
+ virResctrlAllocPtr alloc,
+ const char *drivername,
+ const char *machinename);
+
+int
+virResctrlAllocAddPID(virResctrlAllocPtr alloc,
+ pid_t pid);
+
+int
+virResctrlAllocRemove(virResctrlAllocPtr alloc);
+
#endif /* __VIR_RESCTRL_H__ */
diff --git a/src/util/virresctrlpriv.h b/src/util/virresctrlpriv.h
new file mode 100644
index 000000000000..932ff7329c8c
--- /dev/null
+++ b/src/util/virresctrlpriv.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __VIR_RESCTRL_PRIV_H__
+# define __VIR_RESCTRL_PRIV_H__
+
+# include "virresctrl.h"
+
+virResctrlAllocPtr
+virResctrlAllocGetFree(virResctrlInfoPtr resctrl);
+
+#endif /* __VIR_RESCTRL_PRIV_H__ */
Pavel Hrdina
2018-01-11 14:58:42 UTC
Permalink
Post by John Ferlan
Post by Martin Kletzander
With this commit we finally have a way to read and manipulate basic resctrl
settings. Locking is done only on exposed functions that read/write from/to
resctrlfs. Not in functions that are exposed in virresctrlpriv.h as those are
only supposed to be used from tests.
---
src/Makefile.am | 2 +-
src/libvirt_private.syms | 11 +
src/util/virresctrl.c | 1041 ++++++++++++++++++++++++++++++++++++++++++++-
src/util/virresctrl.h | 62 +++
src/util/virresctrlpriv.h | 27 ++
5 files changed, 1141 insertions(+), 2 deletions(-)
create mode 100644 src/util/virresctrlpriv.h
Geez, as referenced by the cover letter, I'm glad this is the non way
too complicated parts of the code (tongue firmly planted in my cheek
with the obligatory eye roll emoji).
So, IIUC virResctrlInfoPtr is the "host" world and virResctrlAllocPtr is
the "guest" world, right? If so might be something to add to the commit
messages to make things just slightly more clear.
Lots of code here - hopefully another set of eyes can look at this too -
I'm sure I'll miss something as it's been very difficult to review this
in one (long) session...
Note to self, no sense in grep-ing for "alloc" or "free" any more after
this code is pushed as they're liberally used. Kind of funny to see
"alloc_free" in the same line ;-)
@[n]sizes. It seems to be used as the array index for both and it's
never really crystal clear over the relationship between masks and sizes.
Post by Martin Kletzander
diff --git a/src/Makefile.am b/src/Makefile.am
index 4c022d1e44b9..9b4fd0c1d597 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -167,7 +167,7 @@ UTIL_SOURCES = \
util/virprocess.c util/virprocess.h \
util/virqemu.c util/virqemu.h \
util/virrandom.h util/virrandom.c \
- util/virresctrl.h util/virresctrl.c \
+ util/virresctrl.h util/virresctrl.c util/virresctrlpriv.h \
util/virrotatingfile.h util/virrotatingfile.c \
util/virscsi.c util/virscsi.h \
util/virscsihost.c util/virscsihost.h \
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 1a4f304f5e1f..a8009d913669 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2548,6 +2548,17 @@ virRandomInt;
# util/virresctrl.h
virCacheTypeFromString;
virCacheTypeToString;
+virResctrlAllocAddPID;
+virResctrlAllocCreate;
+virResctrlAllocForeachSize;
+virResctrlAllocFormat;
+virResctrlAllocGetFree;
+virResctrlAllocNew;
+virResctrlAllocRemove;
+virResctrlAllocSetID;
+virResctrlAllocSetSize;
+virResctrlAllocUpdateMask;
+virResctrlAllocUpdateSize;
virResctrlGetInfo;
virResctrlInfoGetCache;
virResctrlInfoNew;
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index a681322905be..32ab84b6f121 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -18,8 +18,12 @@
#include <config.h>
-#include "virresctrl.h"
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "virresctrlpriv.h"
#include "c-ctype.h"
#include "count-one-bits.h"
#include "viralloc.h"
@@ -151,6 +155,156 @@ virResctrlInfoNew(void)
}
+/* Alloc-related definitions and AllocClass-related functions */
+typedef struct _virResctrlAllocPerType virResctrlAllocPerType;
+typedef virResctrlAllocPerType *virResctrlAllocPerTypePtr;
+struct _virResctrlAllocPerType {
+ /* There could be bool saying whether this is set or not, but since everything
+ * in virResctrlAlloc (and most of libvirt) goes with pointer arrays we would
+ * have to have one more level of allocation anyway, so this stays faithful to
+ * the concept */
+ unsigned long long **sizes;
+ size_t nsizes;
exactly? Is it a page size or just a "max size" in ?bytes for something
stored in the cache?
It's an array that stores allocation size per CPU cache, since there
can be multiple CPUs in the host system.
Post by John Ferlan
Post by Martin Kletzander
+
+ /* Mask for each cache */
+ virBitmapPtr *masks;
+ size_t nmasks;
And of course, what does the mask represent?
The mask represents how the cache allocation is written into the
schemata file and it is based on the cache allocation size.
Let's say that you have L3 cache with size 15 MiB where granularity
of cache allocation is 768 KiB but minimal allocation is 1536 KiB.
This means that you have 20bit mask to configure the cache allocation.

So for example, if you would like to configure 3072 KiB of L3 cache
for a guest, the size would be stored in sizes. When the guest is
started, the following code tries to find a free allocation, that can
be used by the guest and if the allocation is possible it will store
the allocation mask for that specified CPU.

This would be the guest XML part:

<cachetune vcpus='0-3'>
<cache id='0' level='3' type='both' size='3' unit='MiB'/>
</cachetune>

And the resulting mask can be for example:

0x00F00

Which would lead into this schemata file:

L3:0=00F00
Post by John Ferlan
Just a quick comment would suffice - for the future readers as well.
Post by Martin Kletzander
+};
+
+typedef struct _virResctrlAllocPerLevel virResctrlAllocPerLevel;
+typedef virResctrlAllocPerLevel *virResctrlAllocPerLevelPtr;
+struct _virResctrlAllocPerLevel {
+ virResctrlAllocPerTypePtr *types; /* Indexed with enum virCacheType */
+};
+
+struct _virResctrlAlloc {
+ virObject parent;
+
+ virResctrlAllocPerLevelPtr *levels;
+ size_t nlevels;
These represent the "L#" for the directory, right? Examples would be
L1both, L2code, l3data, right?
Just trying to provide enough information so that some future reader
will have a chance to understand the design at it's simplist level.
Post by Martin Kletzander
+
+ char *id; /* The identifier (any unique string for now) */
we could call this uniqueId too, IDC really though. Just having "id"
makes it a bit difficult to search on (cacheUID also works).
Post by Martin Kletzander
+ char *path;
This is our generated path to the guest cache allocation data.
Post by Martin Kletzander
+};
+
+static virClassPtr virResctrlAllocClass;
+
+static void
+virResctrlAllocDispose(void *obj)
+{
+ size_t i = 0;
+ size_t j = 0;
+ size_t k = 0;
+
+ virResctrlAllocPtr resctrl = obj;
+
+ for (i = 0; i < resctrl->nlevels; i++) {
+ virResctrlAllocPerLevelPtr level = resctrl->levels[i];
+
+ if (!level)
+ continue;
Looking at virResctrlAllocGetType, it's possible that ->levels is
allocated, but ->levels[level]->types is not, thus potentially causing
problems for the following loop.
I think a " || !level->types" above would do the trick.
Nice catch, however I would rather refactor the virResctrlAllocGetType
function to always allocate it correctly or cleanup after itself it
the allocation fails.

For example instead of this code:

if (!resctrl->levels[level] &&
(VIR_ALLOC(resctrl->levels[level]) < 0 ||
VIR_ALLOC_N(resctrl->levels[level]->types, VIR_CACHE_TYPE_LAST) < 0))
return NULL;

we can have something like this:

if (!resctrl->levels[level]) {
virResctrlAllocPerTypePtr *types = NULL;

if (VIR_ALLOC_N(types, VIR_CACHE_TYPE_LAST) < 0)
return NULL;

if (VIR_ALLOC(resctrl->levels[level]) < 0) {
VIR_FREE(types);
return NULL;
}
resctrl->levels[level]->types = types;
}
Post by John Ferlan
Post by Martin Kletzander
+
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlAllocPerTypePtr type = level->types[j];
+
+ if (!type)
+ continue;
+
+ for (k = 0; k < type->nsizes; k++)
+ VIR_FREE(type->sizes[k]);
+
+ for (k = 0; k < type->nmasks; k++)
+ virBitmapFree(type->masks[k]);
+
+ VIR_FREE(type->sizes);
+ VIR_FREE(type->masks);
+ VIR_FREE(type);
+ }
+ VIR_FREE(level->types);
+ VIR_FREE(level);
+ }
+
+ VIR_FREE(resctrl->id);
+ VIR_FREE(resctrl->path);
+ VIR_FREE(resctrl->levels);
+}
+
+
+static int
+virResctrlAllocOnceInit(void)
+{
+ if (!(virResctrlAllocClass = virClassNew(virClassForObject(),
+ "virResctrlAlloc",
+ sizeof(virResctrlAlloc),
+ virResctrlAllocDispose)))
+ return -1;
+
+ return 0;
+}
+
+
+VIR_ONCE_GLOBAL_INIT(virResctrlAlloc)
+
+
+virResctrlAllocPtr
+virResctrlAllocNew(void)
+{
+ if (virResctrlAllocInitialize() < 0)
+ return NULL;
+
+ return virObjectNew(virResctrlAllocClass);
+}
+
+
+/* Common functions */
+static int
+virResctrlLockInternal(int op)
+{
+ int fd = open(SYSFS_RESCTRL_PATH, O_DIRECTORY | O_CLOEXEC);
I started reviewing bottom up and wondered later on - do we really want
to be writing into /sys/fs/resctrl? See virResctrlAllocCreate comments.
Well, we kind of need to write to /sys/fs/resctrl to configure the
cache allocation in the host for the guest that is about to start.
Post by John Ferlan
Post by Martin Kletzander
+
+ if (fd < 0) {
+ virReportSystemError(errno, "%s", _("Cannot open resctrlfs"));
+ return -1;
+ }
+
+ if (flock(fd, op) < 0) {
+ virReportSystemError(errno, "%s", _("Cannot lock resctrlfs"));
+ VIR_FORCE_CLOSE(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+
+static inline int
+virResctrlLockWrite(void)
+{
+ return virResctrlLockInternal(LOCK_EX);
+}
+
+
+static int
+virResctrlUnlock(int fd)
+{
+ if (fd == -1)
+ return 0;
+
+ /* The lock gets unlocked by closing the fd, which we need to do anyway in
+ * order to clean up properly */
+ if (VIR_CLOSE(fd) < 0) {
+ virReportSystemError(errno, "%s", _("Cannot close resctrlfs"));
+
+ /* Trying to save the already broken */
+ if (flock(fd, LOCK_UN) < 0)
+ virReportSystemError(errno, "%s", _("Cannot unlock resctrlfs"));
+ return -1;
+ }
+
+ return 0;
+}
+
+
/* Info-related functions */
bool
virResctrlInfoIsEmpty(virResctrlInfoPtr resctrl)
@@ -354,3 +508,888 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
VIR_FREE(*controls);
goto cleanup;
}
+
+
+/* Alloc-related functions */
+bool
+virResctrlAllocIsEmpty(virResctrlAllocPtr resctrl)
+{
+ size_t i = 0;
+ size_t j = 0;
+ size_t k = 0;
+
+ if (!resctrl)
+ return true;
+
Rather than numerous loops - would it perhaps be better to have a single
boolean field that gets set whenever a "sizes" or "masks" entry is set?
Of course this would all need to be locked, right?
Post by Martin Kletzander
+ for (i = 0; i < resctrl->nlevels; i++) {
+ virResctrlAllocPerLevelPtr a_level = resctrl->levels[i];
+
+ if (!a_level)
+ continue;
+
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[j];
+
+ if (!a_type)
+ continue;
+
+ for (k = 0; k < a_type->nsizes; k++) {
+ if (a_type->sizes[k])
+ return false;
+ }
+
+ for (k = 0; k < a_type->nmasks; k++) {
+ if (a_type->masks[k])
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+
+static virResctrlAllocPerTypePtr
+virResctrlAllocGetType(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type)
+{
+ virResctrlAllocPerLevelPtr a_level = NULL;
+
+ if (resctrl->nlevels <= level &&
+ VIR_EXPAND_N(resctrl->levels, resctrl->nlevels, level - resctrl->nlevels + 1) < 0)
+ return NULL;
+
+ if (!resctrl->levels[level] &&
+ (VIR_ALLOC(resctrl->levels[level]) < 0 ||
+ VIR_ALLOC_N(resctrl->levels[level]->types, VIR_CACHE_TYPE_LAST) < 0))
+ return NULL;
Could have a problem if ->levels[level] ALLOC succeeds, but
->levels[level]->types ALLOC_N fails vis-a-vis the assumption in Dispose.
Post by Martin Kletzander
+
+ a_level = resctrl->levels[level];
+
+ if (!a_level->types[type] && VIR_ALLOC(a_level->types[type]) < 0)
+ return NULL;
+
+ return a_level->types[type];
+}
+
+
+int
+virResctrlAllocUpdateMask(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ virBitmapPtr mask)
So far, only ever called from virresctrl.c, so can this be static? Or
are there future plans?
Post by Martin Kletzander
+{
+ virResctrlAllocPerTypePtr a_type = virResctrlAllocGetType(resctrl, level, type);
+
+ if (!a_type)
+ return -1;
+
+ if (a_type->nmasks <= cache &&
+ VIR_EXPAND_N(a_type->masks, a_type->nmasks,
+ cache - a_type->nmasks + 1) < 0)
+ return -1;
+
+ if (!a_type->masks[cache]) {
+ a_type->masks[cache] = virBitmapNew(virBitmapSize(mask));
+
+ if (!a_type->masks[cache])
+ return -1;
+ }
Since this is an Update function, is it possible that the incoming mask
is a different size than what was already existing? IOW: In an else
condition, do we need to compare the size of the source and destination
before the Copy? And if so, Free the current and allocate one using the
new size...
This shouldn't ever happen and if it does, something is seriously
wrong since the mask is stored in /sys/fs/resctrl and it should
already ensure that everything is correctly formatted.
Post by John Ferlan
Post by Martin Kletzander
+
+ return virBitmapCopy(a_type->masks[cache], mask);
In order to speed up IsEmpty we could set a boolean indicating the
change to resctrl
Post by Martin Kletzander
+}
+
+
+int
+virResctrlAllocUpdateSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size)
Same here, only called from virresctrl.c
Post by Martin Kletzander
+{
+ virResctrlAllocPerTypePtr a_type = virResctrlAllocGetType(resctrl, level, type);
+
+ if (!a_type)
+ return -1;
+
+ if (a_type->nsizes <= cache &&
+ VIR_EXPAND_N(a_type->sizes, a_type->nsizes,
+ cache - a_type->nsizes + 1) < 0)
+ return -1;
+
+ if (!a_type->sizes[cache] && VIR_ALLOC(a_type->sizes[cache]) < 0)
+ return -1;
+
+ *(a_type->sizes[cache]) = size;
+
Same comment regarding IsEmpty and setting a boolean...
Post by Martin Kletzander
+ return 0;
+}
+
+
+static bool
+virResctrlAllocCheckCollision(virResctrlAllocPtr a,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache)
+{
+ virResctrlAllocPerLevelPtr a_level = NULL;
+ virResctrlAllocPerTypePtr a_type = NULL;
+
+ if (!a)
+ return false;
+
+ if (a->nlevels <= level)
+ return false;
+
+ a_level = a->levels[level];
+
+ if (!a_level)
+ return false;
+
+ /* All types should be always allocated */
+ a_type = a_level->types[VIR_CACHE_TYPE_BOTH];
+
+ /* If there is an allocation for type 'both', there can be no other
+ * allocation for the same cache */
+ if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+ return true;
+
+ if (type == VIR_CACHE_TYPE_BOTH) {
+ a_type = a_level->types[VIR_CACHE_TYPE_CODE];
+
+ if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+ return true;
+
+ a_type = a_level->types[VIR_CACHE_TYPE_DATA];
+
+ if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+ return true;
+ } else {
+ a_type = a_level->types[type];
+
+ if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+ return true;
+ }
This is simple yet confusing all in the same pile of code. I'm sure it
helps to have the overall design in mind though.
Would it even matter to check any of this if IsEmpty returns true?
Post by Martin Kletzander
+
+ return false;
+}
+
+
+int
+virResctrlAllocSetSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size)
+{
+ if (virResctrlAllocCheckCollision(resctrl, level, type, cache)) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("Colliding cache allocations for cache "
+ "level '%u' id '%u', type '%s'"),
+ level, cache, virCacheTypeToString(type));
+ return -1;
+ }
+
+ return virResctrlAllocUpdateSize(resctrl, level, type, cache, size);
+}
+
+
+int
+virResctrlAllocForeachSize(virResctrlAllocPtr resctrl,
+ virResctrlAllocForeachSizeCallback cb,
+ void *opaque)
+{
+ int ret = 0;
+ unsigned int level = 0;
+ unsigned int type = 0;
+ unsigned int cache = 0;
+
+ if (!resctrl)
+ return 0;
Is it possible there are no sizes set?
Post by Martin Kletzander
+
+ for (level = 0; level < resctrl->nlevels; level++) {
+ virResctrlAllocPerLevelPtr a_level = resctrl->levels[level];
+
+ if (!a_level)
+ continue;
+
+ for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[type];
+
+ if (!a_type)
+ continue;
+
+ for (cache = 0; cache < a_type->nsizes; cache++) {
+ unsigned long long *size = a_type->sizes[cache];
+
+ if (!size)
+ continue;
+
+ ret = cb(level, type, cache, *size, opaque);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+int
+virResctrlAllocSetID(virResctrlAllocPtr alloc,
+ const char *id)
+{
+ if (!id) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Resctrl allocation 'id' cannot be NULL"));
+ return -1;
+ }
+
+ return VIR_STRDUP(alloc->id, id);
+}
+
+
Just a few words here about the expected format you'll be formatting and
parsing would be helpful. Still not 100% clear how [n]sizes comes into
play. When you're saving/re-reading I guess I would have thought (for
some reason) that perhaps sizes would be important
Post by Martin Kletzander
+char *
+virResctrlAllocFormat(virResctrlAllocPtr resctrl)
So far only ever called from virresctrl.c
Post by Martin Kletzander
+{
+ virBuffer buf = VIR_BUFFER_INITIALIZER;
+ unsigned int level = 0;
+ unsigned int type = 0;
+ unsigned int cache = 0;
+
+ if (!resctrl)
+ return NULL;
+
+ for (level = 0; level < resctrl->nlevels; level++) {
+ virResctrlAllocPerLevelPtr a_level = resctrl->levels[level];
+
+ if (!a_level)
+ continue;
+
+ for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[type];
+
+ if (!a_type)
+ continue;
+
+ virBufferAsprintf(&buf, "L%u%s:", level, virResctrlTypeToString(type));
+
+ for (cache = 0; cache < a_type->nmasks; cache++) {
+ virBitmapPtr mask = a_type->masks[cache];
+ char *mask_str = NULL;
+
+ if (!mask)
+ continue;
+
+ mask_str = virBitmapToString(mask, false, true);
+ if (!mask_str) {
+ virBufferFreeAndReset(&buf);
+ return NULL;
+ }
+
+ virBufferAsprintf(&buf, "%u=%s;", cache, mask_str);
+ VIR_FREE(mask_str);
+ }
+
+ virBufferTrim(&buf, ";", 1);
+ virBufferAddChar(&buf, '\n');
+ }
+ }
+
+ virBufferCheckError(&buf);
+ return virBufferContentAndReset(&buf);
Joy - some day it'd be nice to get back to the code that was going to
combine the CheckError and ContentAndReset... In the meantime, once this
is pushed I'll have another Coverity whine to filter (since CheckError
doesn't check status like 217 out of 220 calls do).
Post by Martin Kletzander
+}
+
+
+static int
+virResctrlAllocParseProcessCache(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ char *cache)
+{
+ char *tmp = strchr(cache, '=');
+ unsigned int cache_id = 0;
+ virBitmapPtr mask = NULL;
+ int ret = -1;
+
+ if (!tmp)
+ return 0;
+
+ *tmp = '\0';
+ tmp++;
+
+ if (virStrToLong_uip(cache, NULL, 10, &cache_id) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Invalid cache id '%s'"), cache);
+ return -1;
+ }
+
+ mask = virBitmapNewString(tmp);
+ if (!mask)
+ return -1;
+
+ if (virResctrlAllocUpdateMask(resctrl, level, type, cache_id, mask) < 0)
+ goto cleanup;
+
+ ret = 0;
+ virBitmapFree(mask);
+ return ret;
+}
+
+
+static int
+virResctrlAllocParseProcessLine(virResctrlAllocPtr resctrl,
+ char *line)
+{
+ char **caches = NULL;
+ char *tmp = NULL;
+ unsigned int level = 0;
+ int type = -1;
+ size_t ncaches = 0;
+ size_t i = 0;
+ int ret = -1;
+
+ /* Skip lines that don't concern caches, e.g. MB: etc. */
+ if (line[0] != 'L')
+ return 0;
+
+ /* And lines that we can't parse too */
+ tmp = strchr(line, ':');
+ if (!tmp)
+ return 0;
+
+ *tmp = '\0';
+ tmp++;
+
+ if (virStrToLong_uip(line + 1, &line, 10, &level) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Cannot parse resctrl schema level '%s'"),
+ line + 1);
+ return -1;
+ }
+
+ type = virResctrlTypeFromString(line);
+ if (type < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Cannot parse resctrl schema level '%s'"),
+ line + 1);
+ return -1;
+ }
+
+ caches = virStringSplitCount(tmp, ";", 0, &ncaches);
+ if (!caches)
+ return 0;
+
+ for (i = 0; i < ncaches; i++) {
+ if (virResctrlAllocParseProcessCache(resctrl, level, type, caches[i]) < 0)
+ goto cleanup;
+ }
+
+ ret = 0;
+ virStringListFree(caches);
+ return ret;
+}
+
+
+static int
+virResctrlAllocParse(virResctrlAllocPtr alloc,
+ const char *schemata)
+{
+ char **lines = NULL;
+ size_t nlines = 0;
+ size_t i = 0;
+ int ret = -1;
+
+ lines = virStringSplitCount(schemata, "\n", 0, &nlines);
+ for (i = 0; i < nlines; i++) {
+ if (virResctrlAllocParseProcessLine(alloc, lines[i]) < 0)
+ goto cleanup;
+ }
+
+ ret = 0;
+ virStringListFree(lines);
+ return ret;
+}
+
+
+static void
+virResctrlAllocSubtractPerType(virResctrlAllocPerTypePtr a,
+ virResctrlAllocPerTypePtr b)
+{
+ size_t i = 0;
+
+ if (!a || !b)
+ return;
+
+ for (i = 0; i < a->nmasks && i < b->nmasks; ++i) {
+ if (a->masks[i] && b->masks[i])
+ virBitmapSubtract(a->masks[i], b->masks[i]);
+ }
Does [n]sizes affect anything? IOW: does it matter.. probably not, but
my brain is overloaded right now!
Post by Martin Kletzander
+}
+
+
+static void
+virResctrlAllocSubtract(virResctrlAllocPtr a,
+ virResctrlAllocPtr b)
perhaps easier to "think about" if using dst and src... here and above.
Post by Martin Kletzander
+{
+ size_t i = 0;
+ size_t j = 0;
+
+ if (!b)
+ return;
+
+ for (i = 0; i < a->nlevels && b->nlevels; ++i) {
b->nlevels is not a boolean or pointer... should be "i < " too right?
Any special reason to use ++i over i++?
Post by Martin Kletzander
+ if (a->levels[i] && b->levels[i]) {
+ /* Here we rely on all the system allocations to use the same types.
+ * We kind of _hope_ it's the case. If this is left here until the
+ * review and someone finds it, then suggest only removing this last
+ * sentence. */
Your golden egg has been discovered and I agree, how do we know? and of
course what role does CheckCollision (eventually) play?
Post by Martin Kletzander
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlAllocSubtractPerType(a->levels[i]->types[j],
+ b->levels[i]->types[j]);
+ }
+ }
+ }
+}
+
+
+static virResctrlAllocPtr
+virResctrlAllocNewFromInfo(virResctrlInfoPtr info)
+{
+ size_t i = 0;
+ size_t j = 0;
+ size_t k = 0;
+ virResctrlAllocPtr ret = virResctrlAllocNew();
+ virBitmapPtr mask = NULL;
+
+ if (!ret)
+ return NULL;
+
+ for (i = 0; i < info->nlevels; i++) {
+ virResctrlInfoPerLevelPtr i_level = info->levels[i];
+
+ if (!i_level)
+ continue;
+
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlInfoPerTypePtr i_type = i_level->types[j];
+
+ if (!i_type)
+ continue;
+
+ virBitmapFree(mask);
+ mask = virBitmapNew(i_type->bits);
+ if (!mask)
+ goto error;
+ virBitmapSetAll(mask);
+
+ for (k = 0; k <= i_type->max_cache_id; k++) {
+ if (virResctrlAllocUpdateMask(ret, i, j, k, mask) < 0)
+ goto error;
+ }
+ }
+ }
+
+ virBitmapFree(mask);
+ return ret;
+ virObjectUnref(ret);
+ ret = NULL;
+ goto cleanup;
+}
+
+
This needs a function header - args, what does it do, etc.
Post by Martin Kletzander
+virResctrlAllocPtr
+virResctrlAllocGetFree(virResctrlInfoPtr resctrl)
So far only ever called from virresctrl.c and from a static function, so
is it really necessary to be external?
It's also really "odd" to have Free in the name and this isn't
necessarily a Free type function - far from it! Perhaps Unused is closer
to reality with the usage of the Subtract calls. Of course that could
affect variable names selected already...
Post by Martin Kletzander
+{
+ virResctrlAllocPtr ret = NULL;
+ virResctrlAllocPtr alloc = NULL;
+ virBitmapPtr mask = NULL;
@mask is unused in this context except for the virBitmapFree
Post by Martin Kletzander
+ struct dirent *ent = NULL;
+ DIR *dirp = NULL;
+ char *schemata = NULL;
+ int rv = -1;
+
+ if (virResctrlInfoIsEmpty(resctrl)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Resource control is not supported on this host"));
+ return NULL;
+ }
+
+ ret = virResctrlAllocNewFromInfo(resctrl);
+ if (!ret)
+ return NULL;
+
+ if (virFileReadValueString(&schemata,
+ SYSFS_RESCTRL_PATH
+ "/schemata") < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Could not read schemata file for the default group"));
+ goto error;
+ }
Shall I assume that schemata is a CAT specific file? So is "default
group" the right wording? Just seems strange - consistently used
though... If changed you'd have multiple changes. I guess something is
my mind is wanting to associate it with a cgroup.
Post by Martin Kletzander
+
+ alloc = virResctrlAllocNew();
+ if (!alloc)
+ goto error;
+
+ if (virResctrlAllocParse(alloc, schemata) < 0)
+ goto error;
+ if (virResctrlAllocIsEmpty(alloc)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("No schemata for default resctrlfs group"));
+ goto error;
+ }
+ virResctrlAllocSubtract(ret, alloc);
+
+ if (virDirOpen(&dirp, SYSFS_RESCTRL_PATH) < 0)
+ goto error;
+
+ while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH)) > 0) {
+ if (ent->d_type != DT_DIR)
+ continue;
+
+ if (STREQ(ent->d_name, "info"))
+ continue;
+
+ VIR_FREE(schemata);
+ if (virFileReadValueString(&schemata,
+ SYSFS_RESCTRL_PATH
+ "/%s/schemata",
+ ent->d_name) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Could not read schemata file for group %s"),
+ ent->d_name);
+ goto error;
+ }
+
+ virObjectUnref(alloc);
+ alloc = virResctrlAllocNew();
+ if (!alloc)
+ goto error;
+
+ if (virResctrlAllocParse(alloc, schemata) < 0)
+ goto error;
+
+ virResctrlAllocSubtract(ret, alloc);
+
+ VIR_FREE(schemata);
I think this one could be duplicitous
Post by Martin Kletzander
+ }
+ if (rv < 0)
+ goto error;
+
+ virObjectUnref(alloc);
+ VIR_DIR_CLOSE(dirp);
+ VIR_FREE(schemata);
+ virBitmapFree(mask);
+ return ret;
+
+ virObjectUnref(ret);
+ ret = NULL;
+ goto cleanup;
+}
+
+
Could use a short (haha) description for all the magic that happens here!
Post by Martin Kletzander
+static int
+virResctrlAllocMasksAssign(virResctrlInfoPtr r_info,
s/r_info/resctrl/ for consistency?
Post by Martin Kletzander
+ virResctrlAllocPtr alloc)
+{
+ int ret = -1;
+ unsigned int level = 0;
+ virResctrlAllocPtr alloc_free = NULL;
+
+ if (!r_info) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Resource control is not supported on this host"));
+ return -1;
+ }
Caller already checks this... and it's a static function...
Post by Martin Kletzander
+
+ alloc_free = virResctrlAllocGetFree(r_info);
+ if (!alloc_free)
+ return -1;
+
+ for (level = 0; level < alloc->nlevels; level++) {
+ virResctrlAllocPerLevelPtr a_level = alloc->levels[level];
+ virResctrlAllocPerLevelPtr f_level = NULL;
+ unsigned int type = 0;
+
+ if (!a_level)
+ continue;
+
+ if (level < alloc_free->nlevels)
+ f_level = alloc_free->levels[level];
+
+ if (!f_level) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache level %d does not support tuning"),
+ level);
+ goto cleanup;
+ }
+
+ for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[type];
+ virResctrlAllocPerTypePtr f_type = f_level->types[type];
+ unsigned int cache = 0;
+
+ if (!a_type)
+ continue;
+
+ if (!f_type) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache level %d does not support tuning for "
+ "scope type '%s'"),
+ level, virCacheTypeToString(type));
+ goto cleanup;
+ }
+
large wet piles of brain matter all over my keyboard for this following
loop. It is 100% not self documenting - I'm not sure WTF is going on.
I have to agree that following this code is extremely hard,
perhaps splitting it into several functions would help to review
this madness :).
Post by John Ferlan
Post by Martin Kletzander
+ for (cache = 0; cache < a_type->nsizes; cache++) {
+ unsigned long long *size = a_type->sizes[cache];
+ virBitmapPtr a_mask = NULL;
+ virBitmapPtr f_mask = NULL;
+ virResctrlInfoPerLevelPtr i_level = r_info->levels[level];
+ virResctrlInfoPerTypePtr i_type = i_level->types[type];
+ unsigned long long granularity;
+ unsigned long long need_bits;
+ size_t i = 0;
+ ssize_t pos = -1;
+ ssize_t last_bits = 0;
+ ssize_t last_pos = -1;
+
+ if (!size)
+ continue;
+
+ if (cache >= f_type->nmasks) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache with id %u does not exists for level %d"),
+ cache, level);
+ goto cleanup;
+ }
+
+ f_mask = f_type->masks[cache];
+ if (!f_mask) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache level %d id %u does not support tuning for "
+ "scope type '%s'"),
+ level, cache, virCacheTypeToString(type));
+ goto cleanup;
+ }
+
+ granularity = i_type->size / i_type->bits;
+ need_bits = *size / granularity;
+
+ if (*size % granularity) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache allocation of size %llu is not "
+ "divisible by granularity %llu"),
+ *size, granularity);
+ goto cleanup;
+ }
+
+ if (need_bits < i_type->min_cbm_bits) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache allocation of size %llu is smaller "
+ "than the minimum allowed allocation %llu"),
+ *size, granularity * i_type->min_cbm_bits);
+ goto cleanup;
+ }
+
+ while ((pos = virBitmapNextSetBit(f_mask, pos)) >= 0) {
+ ssize_t pos_clear = virBitmapNextClearBit(f_mask, pos);
+ ssize_t bits;
+
+ if (pos_clear < 0)
+ pos_clear = virBitmapSize(f_mask);
+
+ bits = pos_clear - pos;
+
+ /* Not enough bits, move on and skip all of them */
+ if (bits < need_bits) {
+ pos = pos_clear;
+ continue;
+ }
+
+ /* This fits perfectly */
+ if (bits == need_bits) {
+ last_pos = pos;
+ break;
+ }
+
+ /* Remember the smaller region if we already found on before */
+ if (last_pos < 0 || (last_bits && bits < last_bits)) {
+ last_bits = bits;
+ last_pos = pos;
+ }
+
+ pos = pos_clear;
+ }
+
+ if (last_pos < 0) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Not enough room for allocation of "
+ "%llu bytes for level %u cache %u "
+ "scope type '%s'"),
+ *size, level, cache,
+ virCacheTypeToString(type));
+ goto cleanup;
+ }
+
+ a_mask = virBitmapNew(i_type->bits);
+ for (i = last_pos; i < last_pos + need_bits; i++) {
+ ignore_value(virBitmapSetBit(a_mask, i));
+ ignore_value(virBitmapClearBit(f_mask, i));
+ }
+
+ if (a_type->nmasks <= cache) {
+ if (VIR_EXPAND_N(a_type->masks, a_type->nmasks,
+ cache - a_type->nmasks + 1) < 0) {
+ virBitmapFree(a_mask);
+ goto cleanup;
+ }
+ }
+ a_type->masks[cache] = a_mask;
+ }
+ }
+ }
+
+ ret = 0;
+ virObjectUnref(alloc_free);
+ return ret;
+}
+
+
+/* This checks if the directory for the alloc exists. If not it tries to create
+ * it and apply appropriate alloc settings. */
An overly simplified description for all that happens here.
Post by Martin Kletzander
+int
+virResctrlAllocCreate(virResctrlInfoPtr r_info,
s/r_info/resctrl/ for consistency?
Post by Martin Kletzander
+ virResctrlAllocPtr alloc,
+ const char *drivername,
+ const char *machinename)
+{
+ char *schemata_path = NULL;
+ char *alloc_str = NULL;
+ int ret = -1;
+ int lockfd = -1;
+
+ if (!alloc)
+ return 0;
+
+ if (!r_info) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Resource control is not supported on this host"));
+ return -1;
+ }
+
+ if (!alloc->path &&
+ virAsprintf(&alloc->path, "%s/%s-%s-%s",
+ SYSFS_RESCTRL_PATH, drivername, machinename, alloc->id) < 0)
+ return -1;
FWIW: From one of the .0 responses, this is where you get that
be superfluous, but it's not 100% clear what the options from the
In any case, if I'm reading things correctly it seems we would be saving
the guest data in the /sys/fs/resctrl path - is that the "right" place?
I can see for Info (e.g. host) level data that's read, sure, but for
guest (or Alloc) data that's written (and read) it's less clear in my
mind. Worried a bit about polluting /sys, some permissions issues
there, and a bit of future proofing - similar to cgroup layouts which
haven't remained constant through time. It would seem guests using this
would need to be sufficiently privileged or essentially not a session
guest...
Shouldn't things go in the guest libDir? IOW: vm->privateData->libDir
that could be passed from qemuProcessResctrlCreate and avoid replicating
the build-up logic here?
I would think avoiding /sys for guest data would probably be a good
idea. If the host dies how do we clean up after ourselves? At least if
we use guest directories, we can 'easily' document how to clean up.
As I've wrote, we need to write to /sys/fs/resctrl in order to apply
the cache allocation for a guest. This is not saving the "guest data"
anywhere, that's already done by having it configured in the guest XML.

I was able to find some documentation:

<https://github.com/intel/intel-cmt-cat/wiki/resctrl>

Pavel
Post by John Ferlan
Post by Martin Kletzander
+
+ /* Check if this allocation was already created */
+ if (virFileIsDir(alloc->path))
+ return 0;
+
+ if (virFileExists(alloc->path)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Path '%s' for resctrl allocation exists and is not a "
+ "directory"), alloc->path);
+ goto cleanup;
+ }
+
+ lockfd = virResctrlLockWrite();
+ if (lockfd < 0)
+ goto cleanup;
+
+ if (virResctrlAllocMasksAssign(r_info, alloc) < 0)
+ goto cleanup;
+
+ alloc_str = virResctrlAllocFormat(alloc);
+ if (!alloc_str)
+ goto cleanup;
+
There is a *lot* of magic going on here regarding how the above calls
will end up generating alloc_str and then how this schemata file gets
populated with data that I really think is not self documenting.
Post by Martin Kletzander
+ if (virAsprintf(&schemata_path, "%s/schemata", alloc->path) < 0)
+ goto cleanup;
+
+ if (virFileMakePath(alloc->path) < 0) {
+ virReportSystemError(errno,
+ _("Cannot create resctrl directory '%s'"),
+ alloc->path);
+ goto cleanup;
+ }
+
+ if (virFileWriteStr(schemata_path, alloc_str, 0) < 0) {
+ rmdir(alloc->path);
open coded virResctrlAllocRemove... could go with virFileDeleteTree too
Post by Martin Kletzander
+ virReportSystemError(errno,
+ _("Cannot write into schemata file '%s'"),
+ schemata_path);
+ goto cleanup;
+ }
+
+ ret = 0;
+ virResctrlUnlock(lockfd);
+ VIR_FREE(alloc_str);
+ VIR_FREE(schemata_path);
+ return ret;
+}
+
+
Might be nice to describe what this is for... Why does this need to be
generated/saved?
Post by Martin Kletzander
+int
+virResctrlAllocAddPID(virResctrlAllocPtr alloc,
+ pid_t pid)
+{
+ char *tasks = NULL;
+ char *pidstr = NULL;
+ int ret = 0;
+
+ if (!alloc->path) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot add pid to non-existing resctrl allocation"));
+ return -1;
+ }
+
Should we do any locking here?
Post by Martin Kletzander
+ if (virAsprintf(&tasks, "%s/tasks", alloc->path) < 0)
+ return -1;
+
+ if (virAsprintf(&pidstr, "%lld", (long long int) pid) < 0)
+ goto cleanup;
+
+ if (virFileWriteStr(tasks, pidstr, 0) < 0) {
+ virReportSystemError(errno,
+ _("Cannot write pid in tasks file '%s'"),
+ tasks);
+ goto cleanup;
+ }
unplugged? Can the cachetunes be updated in that manner?
Post by Martin Kletzander
+
+ ret = 0;
+ VIR_FREE(tasks);
+ VIR_FREE(pidstr);
+ return ret;
+}
+
+
+int
+virResctrlAllocRemove(virResctrlAllocPtr alloc)
+{
+ int ret = 0;
+
+ if (!alloc->path)
+ return 0;
+
+ VIR_DEBUG("Removing resctrl allocation %s", alloc->path);
+ if (rmdir(alloc->path) != 0 && errno != ENOENT) {
Is it only one level of directory or multiple? e.g. would
virFileDeleteTree be better? In fact overall it may be better.
I hope I found everything ;-)
John
Post by Martin Kletzander
+ ret = -errno;
+ VIR_ERROR(_("Unable to remove %s (%d)"), alloc->path, errno);
+ }
+
+ return ret;
+}
diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
index 471c02f28dcd..0fbd9dd3acfb 100644
--- a/src/util/virresctrl.h
+++ b/src/util/virresctrl.h
@@ -68,4 +68,66 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
size_t *ncontrols,
virResctrlInfoPerCachePtr **controls);
+/* Alloc-related things */
+typedef struct _virResctrlAlloc virResctrlAlloc;
+typedef virResctrlAlloc *virResctrlAllocPtr;
+
+typedef int virResctrlAllocForeachSizeCallback(unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size,
+ void *opaque);
+
+virResctrlAllocPtr
+virResctrlAllocNew(void);
+
+bool
+virResctrlAllocIsEmpty(virResctrlAllocPtr resctrl);
+
+int
+virResctrlAllocUpdateSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size);
+
+int
+virResctrlAllocUpdateMask(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ virBitmapPtr mask);
+
+int
+virResctrlAllocSetSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size);
+
+int
+virResctrlAllocForeachSize(virResctrlAllocPtr resctrl,
+ virResctrlAllocForeachSizeCallback cb,
+ void *opaque);
+
+int
+virResctrlAllocSetID(virResctrlAllocPtr alloc,
+ const char *id);
+
+char *
+virResctrlAllocFormat(virResctrlAllocPtr alloc);
+
+int
+virResctrlAllocCreate(virResctrlInfoPtr r_info,
+ virResctrlAllocPtr alloc,
+ const char *drivername,
+ const char *machinename);
+
+int
+virResctrlAllocAddPID(virResctrlAllocPtr alloc,
+ pid_t pid);
+
+int
+virResctrlAllocRemove(virResctrlAllocPtr alloc);
+
#endif /* __VIR_RESCTRL_H__ */
diff --git a/src/util/virresctrlpriv.h b/src/util/virresctrlpriv.h
new file mode 100644
index 000000000000..932ff7329c8c
--- /dev/null
+++ b/src/util/virresctrlpriv.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __VIR_RESCTRL_PRIV_H__
+# define __VIR_RESCTRL_PRIV_H__
+
+# include "virresctrl.h"
+
+virResctrlAllocPtr
+virResctrlAllocGetFree(virResctrlInfoPtr resctrl);
+
+#endif /* __VIR_RESCTRL_PRIV_H__ */
--
libvir-list mailing list
https://www.redhat.com/mailman/listinfo/libvir-list
Martin Kletzander
2018-01-22 15:44:33 UTC
Permalink
Post by Pavel Hrdina
Post by John Ferlan
Post by Martin Kletzander
With this commit we finally have a way to read and manipulate basic resctrl
settings. Locking is done only on exposed functions that read/write from/to
resctrlfs. Not in functions that are exposed in virresctrlpriv.h as those are
only supposed to be used from tests.
---
src/Makefile.am | 2 +-
src/libvirt_private.syms | 11 +
src/util/virresctrl.c | 1041 ++++++++++++++++++++++++++++++++++++++++++++-
src/util/virresctrl.h | 62 +++
src/util/virresctrlpriv.h | 27 ++
5 files changed, 1141 insertions(+), 2 deletions(-)
create mode 100644 src/util/virresctrlpriv.h
Geez, as referenced by the cover letter, I'm glad this is the non way
too complicated parts of the code (tongue firmly planted in my cheek
with the obligatory eye roll emoji).
So, IIUC virResctrlInfoPtr is the "host" world and virResctrlAllocPtr is
the "guest" world, right? If so might be something to add to the commit
messages to make things just slightly more clear.
Lots of code here - hopefully another set of eyes can look at this too -
I'm sure I'll miss something as it's been very difficult to review this
in one (long) session...
Note to self, no sense in grep-ing for "alloc" or "free" any more after
this code is pushed as they're liberally used. Kind of funny to see
"alloc_free" in the same line ;-)
Feel free to suggest different naming. My head is so full of this that
I couldn't think of anything more sensible.
Post by Pavel Hrdina
Post by John Ferlan
@[n]sizes. It seems to be used as the array index for both and it's
never really crystal clear over the relationship between masks and sizes.
There's a lot info that I could add in the comments. And I will for the
next version.
Post by Pavel Hrdina
Post by John Ferlan
Post by Martin Kletzander
diff --git a/src/Makefile.am b/src/Makefile.am
index 4c022d1e44b9..9b4fd0c1d597 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -167,7 +167,7 @@ UTIL_SOURCES = \
util/virprocess.c util/virprocess.h \
util/virqemu.c util/virqemu.h \
util/virrandom.h util/virrandom.c \
- util/virresctrl.h util/virresctrl.c \
+ util/virresctrl.h util/virresctrl.c util/virresctrlpriv.h \
util/virrotatingfile.h util/virrotatingfile.c \
util/virscsi.c util/virscsi.h \
util/virscsihost.c util/virscsihost.h \
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 1a4f304f5e1f..a8009d913669 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2548,6 +2548,17 @@ virRandomInt;
# util/virresctrl.h
virCacheTypeFromString;
virCacheTypeToString;
+virResctrlAllocAddPID;
+virResctrlAllocCreate;
+virResctrlAllocForeachSize;
+virResctrlAllocFormat;
+virResctrlAllocGetFree;
+virResctrlAllocNew;
+virResctrlAllocRemove;
+virResctrlAllocSetID;
+virResctrlAllocSetSize;
+virResctrlAllocUpdateMask;
+virResctrlAllocUpdateSize;
virResctrlGetInfo;
virResctrlInfoGetCache;
virResctrlInfoNew;
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index a681322905be..32ab84b6f121 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -18,8 +18,12 @@
#include <config.h>
-#include "virresctrl.h"
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "virresctrlpriv.h"
#include "c-ctype.h"
#include "count-one-bits.h"
#include "viralloc.h"
@@ -151,6 +155,156 @@ virResctrlInfoNew(void)
}
+/* Alloc-related definitions and AllocClass-related functions */
+typedef struct _virResctrlAllocPerType virResctrlAllocPerType;
+typedef virResctrlAllocPerType *virResctrlAllocPerTypePtr;
+struct _virResctrlAllocPerType {
+ /* There could be bool saying whether this is set or not, but since everything
+ * in virResctrlAlloc (and most of libvirt) goes with pointer arrays we would
+ * have to have one more level of allocation anyway, so this stays faithful to
+ * the concept */
+ unsigned long long **sizes;
+ size_t nsizes;
exactly? Is it a page size or just a "max size" in ?bytes for something
stored in the cache?
It's an array that stores allocation size per CPU cache, since there
can be multiple CPUs in the host system.
Post by John Ferlan
Post by Martin Kletzander
+
+ /* Mask for each cache */
+ virBitmapPtr *masks;
+ size_t nmasks;
And of course, what does the mask represent?
The mask represents how the cache allocation is written into the
schemata file and it is based on the cache allocation size.
Let's say that you have L3 cache with size 15 MiB where granularity
of cache allocation is 768 KiB but minimal allocation is 1536 KiB.
This means that you have 20bit mask to configure the cache allocation.
So for example, if you would like to configure 3072 KiB of L3 cache
for a guest, the size would be stored in sizes. When the guest is
started, the following code tries to find a free allocation, that can
be used by the guest and if the allocation is possible it will store
the allocation mask for that specified CPU.
<cachetune vcpus='0-3'>
<cache id='0' level='3' type='both' size='3' unit='MiB'/>
</cachetune>
0x00F00
L3:0=00F00
Post by John Ferlan
Just a quick comment would suffice - for the future readers as well.
Post by Martin Kletzander
+};
+
+typedef struct _virResctrlAllocPerLevel virResctrlAllocPerLevel;
+typedef virResctrlAllocPerLevel *virResctrlAllocPerLevelPtr;
+struct _virResctrlAllocPerLevel {
+ virResctrlAllocPerTypePtr *types; /* Indexed with enum virCacheType */
+};
+
+struct _virResctrlAlloc {
+ virObject parent;
+
+ virResctrlAllocPerLevelPtr *levels;
+ size_t nlevels;
These represent the "L#" for the directory, right? Examples would be
L1both, L2code, l3data, right?
Just trying to provide enough information so that some future reader
will have a chance to understand the design at it's simplist level.
Post by Martin Kletzander
+
+ char *id; /* The identifier (any unique string for now) */
we could call this uniqueId too, IDC really though. Just having "id"
makes it a bit difficult to search on (cacheUID also works).
Post by Martin Kletzander
+ char *path;
This is our generated path to the guest cache allocation data.
Post by Martin Kletzander
+};
+
+static virClassPtr virResctrlAllocClass;
+
+static void
+virResctrlAllocDispose(void *obj)
+{
+ size_t i = 0;
+ size_t j = 0;
+ size_t k = 0;
+
+ virResctrlAllocPtr resctrl = obj;
+
+ for (i = 0; i < resctrl->nlevels; i++) {
+ virResctrlAllocPerLevelPtr level = resctrl->levels[i];
+
+ if (!level)
+ continue;
Looking at virResctrlAllocGetType, it's possible that ->levels is
allocated, but ->levels[level]->types is not, thus potentially causing
problems for the following loop.
I think a " || !level->types" above would do the trick.
Nice catch, however I would rather refactor the virResctrlAllocGetType
function to always allocate it correctly or cleanup after itself it
the allocation fails.
if (!resctrl->levels[level] &&
(VIR_ALLOC(resctrl->levels[level]) < 0 ||
VIR_ALLOC_N(resctrl->levels[level]->types, VIR_CACHE_TYPE_LAST) < 0))
return NULL;
if (!resctrl->levels[level]) {
virResctrlAllocPerTypePtr *types = NULL;
if (VIR_ALLOC_N(types, VIR_CACHE_TYPE_LAST) < 0)
return NULL;
if (VIR_ALLOC(resctrl->levels[level]) < 0) {
VIR_FREE(types);
return NULL;
}
resctrl->levels[level]->types = types;
}
Yeah, it used to be sparsely allocated, but when switching I forgot this
part. It should clean up after itself.
Post by Pavel Hrdina
Post by John Ferlan
Post by Martin Kletzander
+
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlAllocPerTypePtr type = level->types[j];
+
+ if (!type)
+ continue;
+
+ for (k = 0; k < type->nsizes; k++)
+ VIR_FREE(type->sizes[k]);
+
+ for (k = 0; k < type->nmasks; k++)
+ virBitmapFree(type->masks[k]);
+
+ VIR_FREE(type->sizes);
+ VIR_FREE(type->masks);
+ VIR_FREE(type);
+ }
+ VIR_FREE(level->types);
+ VIR_FREE(level);
+ }
+
+ VIR_FREE(resctrl->id);
+ VIR_FREE(resctrl->path);
+ VIR_FREE(resctrl->levels);
+}
+
+
+static int
+virResctrlAllocOnceInit(void)
+{
+ if (!(virResctrlAllocClass = virClassNew(virClassForObject(),
+ "virResctrlAlloc",
+ sizeof(virResctrlAlloc),
+ virResctrlAllocDispose)))
+ return -1;
+
+ return 0;
+}
+
+
+VIR_ONCE_GLOBAL_INIT(virResctrlAlloc)
+
+
+virResctrlAllocPtr
+virResctrlAllocNew(void)
+{
+ if (virResctrlAllocInitialize() < 0)
+ return NULL;
+
+ return virObjectNew(virResctrlAllocClass);
+}
+
+
+/* Common functions */
+static int
+virResctrlLockInternal(int op)
+{
+ int fd = open(SYSFS_RESCTRL_PATH, O_DIRECTORY | O_CLOEXEC);
I started reviewing bottom up and wondered later on - do we really want
to be writing into /sys/fs/resctrl? See virResctrlAllocCreate comments.
Well, we kind of need to write to /sys/fs/resctrl to configure the
cache allocation in the host for the guest that is about to start.
Post by John Ferlan
Post by Martin Kletzander
+
+ if (fd < 0) {
+ virReportSystemError(errno, "%s", _("Cannot open resctrlfs"));
+ return -1;
+ }
+
+ if (flock(fd, op) < 0) {
+ virReportSystemError(errno, "%s", _("Cannot lock resctrlfs"));
+ VIR_FORCE_CLOSE(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+
+static inline int
+virResctrlLockWrite(void)
+{
+ return virResctrlLockInternal(LOCK_EX);
+}
+
+
+static int
+virResctrlUnlock(int fd)
+{
+ if (fd == -1)
+ return 0;
+
+ /* The lock gets unlocked by closing the fd, which we need to do anyway in
+ * order to clean up properly */
+ if (VIR_CLOSE(fd) < 0) {
+ virReportSystemError(errno, "%s", _("Cannot close resctrlfs"));
+
+ /* Trying to save the already broken */
+ if (flock(fd, LOCK_UN) < 0)
+ virReportSystemError(errno, "%s", _("Cannot unlock resctrlfs"));
+ return -1;
+ }
+
+ return 0;
+}
+
+
/* Info-related functions */
bool
virResctrlInfoIsEmpty(virResctrlInfoPtr resctrl)
@@ -354,3 +508,888 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
VIR_FREE(*controls);
goto cleanup;
}
+
+
+/* Alloc-related functions */
+bool
+virResctrlAllocIsEmpty(virResctrlAllocPtr resctrl)
+{
+ size_t i = 0;
+ size_t j = 0;
+ size_t k = 0;
+
+ if (!resctrl)
+ return true;
+
Rather than numerous loops - would it perhaps be better to have a single
boolean field that gets set whenever a "sizes" or "masks" entry is set?
Of course this would all need to be locked, right?
It wouldn't, no two threads can access the same allocation, it's under a VM.
Post by Pavel Hrdina
Post by John Ferlan
Post by Martin Kletzander
+ for (i = 0; i < resctrl->nlevels; i++) {
+ virResctrlAllocPerLevelPtr a_level = resctrl->levels[i];
+
+ if (!a_level)
+ continue;
+
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[j];
+
+ if (!a_type)
+ continue;
+
+ for (k = 0; k < a_type->nsizes; k++) {
+ if (a_type->sizes[k])
+ return false;
+ }
+
+ for (k = 0; k < a_type->nmasks; k++) {
+ if (a_type->masks[k])
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+
+static virResctrlAllocPerTypePtr
+virResctrlAllocGetType(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type)
+{
+ virResctrlAllocPerLevelPtr a_level = NULL;
+
+ if (resctrl->nlevels <= level &&
+ VIR_EXPAND_N(resctrl->levels, resctrl->nlevels, level - resctrl->nlevels + 1) < 0)
+ return NULL;
+
+ if (!resctrl->levels[level] &&
+ (VIR_ALLOC(resctrl->levels[level]) < 0 ||
+ VIR_ALLOC_N(resctrl->levels[level]->types, VIR_CACHE_TYPE_LAST) < 0))
+ return NULL;
Could have a problem if ->levels[level] ALLOC succeeds, but
->levels[level]->types ALLOC_N fails vis-a-vis the assumption in Dispose.
Post by Martin Kletzander
+
+ a_level = resctrl->levels[level];
+
+ if (!a_level->types[type] && VIR_ALLOC(a_level->types[type]) < 0)
+ return NULL;
+
+ return a_level->types[type];
+}
+
+
+int
+virResctrlAllocUpdateMask(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ virBitmapPtr mask)
So far, only ever called from virresctrl.c, so can this be static? Or
are there future plans?
Post by Martin Kletzander
+{
+ virResctrlAllocPerTypePtr a_type = virResctrlAllocGetType(resctrl, level, type);
+
+ if (!a_type)
+ return -1;
+
+ if (a_type->nmasks <= cache &&
+ VIR_EXPAND_N(a_type->masks, a_type->nmasks,
+ cache - a_type->nmasks + 1) < 0)
+ return -1;
+
+ if (!a_type->masks[cache]) {
+ a_type->masks[cache] = virBitmapNew(virBitmapSize(mask));
+
+ if (!a_type->masks[cache])
+ return -1;
+ }
Since this is an Update function, is it possible that the incoming mask
is a different size than what was already existing? IOW: In an else
condition, do we need to compare the size of the source and destination
before the Copy? And if so, Free the current and allocate one using the
new size...
This shouldn't ever happen and if it does, something is seriously
wrong since the mask is stored in /sys/fs/resctrl and it should
already ensure that everything is correctly formatted.
Post by John Ferlan
Post by Martin Kletzander
+
+ return virBitmapCopy(a_type->masks[cache], mask);
In order to speed up IsEmpty we could set a boolean indicating the
change to resctrl
It wouldn't speed up anything really, instead we would have duplicated
information which would cause mess as it can lead to more errors.
Post by Pavel Hrdina
Post by John Ferlan
Post by Martin Kletzander
+}
+
+
+int
+virResctrlAllocUpdateSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size)
Same here, only called from virresctrl.c
Post by Martin Kletzander
+{
+ virResctrlAllocPerTypePtr a_type = virResctrlAllocGetType(resctrl, level, type);
+
+ if (!a_type)
+ return -1;
+
+ if (a_type->nsizes <= cache &&
+ VIR_EXPAND_N(a_type->sizes, a_type->nsizes,
+ cache - a_type->nsizes + 1) < 0)
+ return -1;
+
+ if (!a_type->sizes[cache] && VIR_ALLOC(a_type->sizes[cache]) < 0)
+ return -1;
+
+ *(a_type->sizes[cache]) = size;
+
Same comment regarding IsEmpty and setting a boolean...
Post by Martin Kletzander
+ return 0;
+}
+
+
+static bool
+virResctrlAllocCheckCollision(virResctrlAllocPtr a,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache)
+{
+ virResctrlAllocPerLevelPtr a_level = NULL;
+ virResctrlAllocPerTypePtr a_type = NULL;
+
+ if (!a)
+ return false;
+
+ if (a->nlevels <= level)
+ return false;
+
+ a_level = a->levels[level];
+
+ if (!a_level)
+ return false;
+
+ /* All types should be always allocated */
+ a_type = a_level->types[VIR_CACHE_TYPE_BOTH];
+
+ /* If there is an allocation for type 'both', there can be no other
+ * allocation for the same cache */
+ if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+ return true;
+
+ if (type == VIR_CACHE_TYPE_BOTH) {
+ a_type = a_level->types[VIR_CACHE_TYPE_CODE];
+
+ if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+ return true;
+
+ a_type = a_level->types[VIR_CACHE_TYPE_DATA];
+
+ if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+ return true;
+ } else {
+ a_type = a_level->types[type];
+
+ if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+ return true;
+ }
This is simple yet confusing all in the same pile of code. I'm sure it
helps to have the overall design in mind though.
Would it even matter to check any of this if IsEmpty returns true?
Post by Martin Kletzander
+
+ return false;
+}
+
+
+int
+virResctrlAllocSetSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size)
+{
+ if (virResctrlAllocCheckCollision(resctrl, level, type, cache)) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("Colliding cache allocations for cache "
+ "level '%u' id '%u', type '%s'"),
+ level, cache, virCacheTypeToString(type));
+ return -1;
+ }
+
+ return virResctrlAllocUpdateSize(resctrl, level, type, cache, size);
+}
+
+
+int
+virResctrlAllocForeachSize(virResctrlAllocPtr resctrl,
+ virResctrlAllocForeachSizeCallback cb,
+ void *opaque)
+{
+ int ret = 0;
+ unsigned int level = 0;
+ unsigned int type = 0;
+ unsigned int cache = 0;
+
+ if (!resctrl)
+ return 0;
Is it possible there are no sizes set?
Yes, then it returns zero without calling the callback.
Post by Pavel Hrdina
Post by John Ferlan
Post by Martin Kletzander
+
+ for (level = 0; level < resctrl->nlevels; level++) {
+ virResctrlAllocPerLevelPtr a_level = resctrl->levels[level];
+
+ if (!a_level)
+ continue;
+
+ for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[type];
+
+ if (!a_type)
+ continue;
+
+ for (cache = 0; cache < a_type->nsizes; cache++) {
+ unsigned long long *size = a_type->sizes[cache];
+
+ if (!size)
+ continue;
+
+ ret = cb(level, type, cache, *size, opaque);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+int
+virResctrlAllocSetID(virResctrlAllocPtr alloc,
+ const char *id)
+{
+ if (!id) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Resctrl allocation 'id' cannot be NULL"));
+ return -1;
+ }
+
+ return VIR_STRDUP(alloc->id, id);
+}
+
+
Just a few words here about the expected format you'll be formatting and
parsing would be helpful. Still not 100% clear how [n]sizes comes into
play. When you're saving/re-reading I guess I would have thought (for
some reason) that perhaps sizes would be important
Post by Martin Kletzander
+char *
+virResctrlAllocFormat(virResctrlAllocPtr resctrl)
So far only ever called from virresctrl.c
Post by Martin Kletzander
+{
+ virBuffer buf = VIR_BUFFER_INITIALIZER;
+ unsigned int level = 0;
+ unsigned int type = 0;
+ unsigned int cache = 0;
+
+ if (!resctrl)
+ return NULL;
+
+ for (level = 0; level < resctrl->nlevels; level++) {
+ virResctrlAllocPerLevelPtr a_level = resctrl->levels[level];
+
+ if (!a_level)
+ continue;
+
+ for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[type];
+
+ if (!a_type)
+ continue;
+
+ virBufferAsprintf(&buf, "L%u%s:", level, virResctrlTypeToString(type));
+
+ for (cache = 0; cache < a_type->nmasks; cache++) {
+ virBitmapPtr mask = a_type->masks[cache];
+ char *mask_str = NULL;
+
+ if (!mask)
+ continue;
+
+ mask_str = virBitmapToString(mask, false, true);
+ if (!mask_str) {
+ virBufferFreeAndReset(&buf);
+ return NULL;
+ }
+
+ virBufferAsprintf(&buf, "%u=%s;", cache, mask_str);
+ VIR_FREE(mask_str);
+ }
+
+ virBufferTrim(&buf, ";", 1);
+ virBufferAddChar(&buf, '\n');
+ }
+ }
+
+ virBufferCheckError(&buf);
+ return virBufferContentAndReset(&buf);
Joy - some day it'd be nice to get back to the code that was going to
combine the CheckError and ContentAndReset... In the meantime, once this
is pushed I'll have another Coverity whine to filter (since CheckError
doesn't check status like 217 out of 220 calls do).
Yeah, agree. Maybe write it as a GSoC idea? ;)
Post by Pavel Hrdina
Post by John Ferlan
Post by Martin Kletzander
+}
+
+
+static int
+virResctrlAllocParseProcessCache(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ char *cache)
+{
+ char *tmp = strchr(cache, '=');
+ unsigned int cache_id = 0;
+ virBitmapPtr mask = NULL;
+ int ret = -1;
+
+ if (!tmp)
+ return 0;
+
+ *tmp = '\0';
+ tmp++;
+
+ if (virStrToLong_uip(cache, NULL, 10, &cache_id) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Invalid cache id '%s'"), cache);
+ return -1;
+ }
+
+ mask = virBitmapNewString(tmp);
+ if (!mask)
+ return -1;
+
+ if (virResctrlAllocUpdateMask(resctrl, level, type, cache_id, mask) < 0)
+ goto cleanup;
+
+ ret = 0;
+ virBitmapFree(mask);
+ return ret;
+}
+
+
+static int
+virResctrlAllocParseProcessLine(virResctrlAllocPtr resctrl,
+ char *line)
+{
+ char **caches = NULL;
+ char *tmp = NULL;
+ unsigned int level = 0;
+ int type = -1;
+ size_t ncaches = 0;
+ size_t i = 0;
+ int ret = -1;
+
+ /* Skip lines that don't concern caches, e.g. MB: etc. */
+ if (line[0] != 'L')
+ return 0;
+
+ /* And lines that we can't parse too */
+ tmp = strchr(line, ':');
+ if (!tmp)
+ return 0;
+
+ *tmp = '\0';
+ tmp++;
+
+ if (virStrToLong_uip(line + 1, &line, 10, &level) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Cannot parse resctrl schema level '%s'"),
+ line + 1);
+ return -1;
+ }
+
+ type = virResctrlTypeFromString(line);
+ if (type < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Cannot parse resctrl schema level '%s'"),
+ line + 1);
+ return -1;
+ }
+
+ caches = virStringSplitCount(tmp, ";", 0, &ncaches);
+ if (!caches)
+ return 0;
+
+ for (i = 0; i < ncaches; i++) {
+ if (virResctrlAllocParseProcessCache(resctrl, level, type, caches[i]) < 0)
+ goto cleanup;
+ }
+
+ ret = 0;
+ virStringListFree(caches);
+ return ret;
+}
+
+
+static int
+virResctrlAllocParse(virResctrlAllocPtr alloc,
+ const char *schemata)
+{
+ char **lines = NULL;
+ size_t nlines = 0;
+ size_t i = 0;
+ int ret = -1;
+
+ lines = virStringSplitCount(schemata, "\n", 0, &nlines);
+ for (i = 0; i < nlines; i++) {
+ if (virResctrlAllocParseProcessLine(alloc, lines[i]) < 0)
+ goto cleanup;
+ }
+
+ ret = 0;
+ virStringListFree(lines);
+ return ret;
+}
+
+
+static void
+virResctrlAllocSubtractPerType(virResctrlAllocPerTypePtr a,
+ virResctrlAllocPerTypePtr b)
+{
+ size_t i = 0;
+
+ if (!a || !b)
+ return;
+
+ for (i = 0; i < a->nmasks && i < b->nmasks; ++i) {
+ if (a->masks[i] && b->masks[i])
+ virBitmapSubtract(a->masks[i], b->masks[i]);
+ }
Does [n]sizes affect anything? IOW: does it matter.. probably not, but
my brain is overloaded right now!
Will add more info.
Post by Pavel Hrdina
Post by John Ferlan
Post by Martin Kletzander
+}
+
+
+static void
+virResctrlAllocSubtract(virResctrlAllocPtr a,
+ virResctrlAllocPtr b)
perhaps easier to "think about" if using dst and src... here and above.
Post by Martin Kletzander
+{
+ size_t i = 0;
+ size_t j = 0;
+
+ if (!b)
+ return;
+
+ for (i = 0; i < a->nlevels && b->nlevels; ++i) {
b->nlevels is not a boolean or pointer... should be "i < " too right?
Any special reason to use ++i over i++?
Post by Martin Kletzander
+ if (a->levels[i] && b->levels[i]) {
+ /* Here we rely on all the system allocations to use the same types.
+ * We kind of _hope_ it's the case. If this is left here until the
+ * review and someone finds it, then suggest only removing this last
+ * sentence. */
Your golden egg has been discovered and I agree, how do we know? and of
course what role does CheckCollision (eventually) play?
Post by Martin Kletzander
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlAllocSubtractPerType(a->levels[i]->types[j],
+ b->levels[i]->types[j]);
+ }
+ }
+ }
+}
+
+
+static virResctrlAllocPtr
+virResctrlAllocNewFromInfo(virResctrlInfoPtr info)
+{
+ size_t i = 0;
+ size_t j = 0;
+ size_t k = 0;
+ virResctrlAllocPtr ret = virResctrlAllocNew();
+ virBitmapPtr mask = NULL;
+
+ if (!ret)
+ return NULL;
+
+ for (i = 0; i < info->nlevels; i++) {
+ virResctrlInfoPerLevelPtr i_level = info->levels[i];
+
+ if (!i_level)
+ continue;
+
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlInfoPerTypePtr i_type = i_level->types[j];
+
+ if (!i_type)
+ continue;
+
+ virBitmapFree(mask);
+ mask = virBitmapNew(i_type->bits);
+ if (!mask)
+ goto error;
+ virBitmapSetAll(mask);
+
+ for (k = 0; k <= i_type->max_cache_id; k++) {
+ if (virResctrlAllocUpdateMask(ret, i, j, k, mask) < 0)
+ goto error;
+ }
+ }
+ }
+
+ virBitmapFree(mask);
+ return ret;
+ virObjectUnref(ret);
+ ret = NULL;
+ goto cleanup;
+}
+
+
This needs a function header - args, what does it do, etc.
Post by Martin Kletzander
+virResctrlAllocPtr
+virResctrlAllocGetFree(virResctrlInfoPtr resctrl)
So far only ever called from virresctrl.c and from a static function, so
is it really necessary to be external?
It's also really "odd" to have Free in the name and this isn't
necessarily a Free type function - far from it! Perhaps Unused is closer
to reality with the usage of the Subtract calls. Of course that could
affect variable names selected already...
Post by Martin Kletzander
+{
+ virResctrlAllocPtr ret = NULL;
+ virResctrlAllocPtr alloc = NULL;
+ virBitmapPtr mask = NULL;
@mask is unused in this context except for the virBitmapFree
Oh, good catch.
Post by Pavel Hrdina
Post by John Ferlan
Post by Martin Kletzander
+ struct dirent *ent = NULL;
+ DIR *dirp = NULL;
+ char *schemata = NULL;
+ int rv = -1;
+
+ if (virResctrlInfoIsEmpty(resctrl)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Resource control is not supported on this host"));
+ return NULL;
+ }
+
+ ret = virResctrlAllocNewFromInfo(resctrl);
+ if (!ret)
+ return NULL;
+
+ if (virFileReadValueString(&schemata,
+ SYSFS_RESCTRL_PATH
+ "/schemata") < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Could not read schemata file for the default group"));
+ goto error;
+ }
Shall I assume that schemata is a CAT specific file? So is "default
group" the right wording? Just seems strange - consistently used
though... If changed you'd have multiple changes. I guess something is
my mind is wanting to associate it with a cgroup.
Yes it is. I believe I sent a link to the documentation few times, but
I always forgot to add the link to the commit message, I guess. I'll
make sure I double check I added it next time.
Post by Pavel Hrdina
Post by John Ferlan
Post by Martin Kletzander
+
+ alloc = virResctrlAllocNew();
+ if (!alloc)
+ goto error;
+
+ if (virResctrlAllocParse(alloc, schemata) < 0)
+ goto error;
+ if (virResctrlAllocIsEmpty(alloc)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("No schemata for default resctrlfs group"));
+ goto error;
+ }
+ virResctrlAllocSubtract(ret, alloc);
+
+ if (virDirOpen(&dirp, SYSFS_RESCTRL_PATH) < 0)
+ goto error;
+
+ while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH)) > 0) {
+ if (ent->d_type != DT_DIR)
+ continue;
+
+ if (STREQ(ent->d_name, "info"))
+ continue;
+
+ VIR_FREE(schemata);
+ if (virFileReadValueString(&schemata,
+ SYSFS_RESCTRL_PATH
+ "/%s/schemata",
+ ent->d_name) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Could not read schemata file for group %s"),
+ ent->d_name);
+ goto error;
+ }
+
+ virObjectUnref(alloc);
+ alloc = virResctrlAllocNew();
+ if (!alloc)
+ goto error;
+
+ if (virResctrlAllocParse(alloc, schemata) < 0)
+ goto error;
+
+ virResctrlAllocSubtract(ret, alloc);
+
+ VIR_FREE(schemata);
I think this one could be duplicitous
Post by Martin Kletzander
+ }
+ if (rv < 0)
+ goto error;
+
+ virObjectUnref(alloc);
+ VIR_DIR_CLOSE(dirp);
+ VIR_FREE(schemata);
+ virBitmapFree(mask);
+ return ret;
+
+ virObjectUnref(ret);
+ ret = NULL;
+ goto cleanup;
+}
+
+
Could use a short (haha) description for all the magic that happens here!
Post by Martin Kletzander
+static int
+virResctrlAllocMasksAssign(virResctrlInfoPtr r_info,
s/r_info/resctrl/ for consistency?
Post by Martin Kletzander
+ virResctrlAllocPtr alloc)
+{
+ int ret = -1;
+ unsigned int level = 0;
+ virResctrlAllocPtr alloc_free = NULL;
+
+ if (!r_info) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Resource control is not supported on this host"));
+ return -1;
+ }
Caller already checks this... and it's a static function...
Post by Martin Kletzander
+
+ alloc_free = virResctrlAllocGetFree(r_info);
+ if (!alloc_free)
+ return -1;
+
+ for (level = 0; level < alloc->nlevels; level++) {
+ virResctrlAllocPerLevelPtr a_level = alloc->levels[level];
+ virResctrlAllocPerLevelPtr f_level = NULL;
+ unsigned int type = 0;
+
+ if (!a_level)
+ continue;
+
+ if (level < alloc_free->nlevels)
+ f_level = alloc_free->levels[level];
+
+ if (!f_level) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache level %d does not support tuning"),
+ level);
+ goto cleanup;
+ }
+
+ for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[type];
+ virResctrlAllocPerTypePtr f_type = f_level->types[type];
+ unsigned int cache = 0;
+
+ if (!a_type)
+ continue;
+
+ if (!f_type) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache level %d does not support tuning for "
+ "scope type '%s'"),
+ level, virCacheTypeToString(type));
+ goto cleanup;
+ }
+
large wet piles of brain matter all over my keyboard for this following
loop. It is 100% not self documenting - I'm not sure WTF is going on.
I have to agree that following this code is extremely hard,
perhaps splitting it into several functions would help to review
this madness :).
I'll extract into functions. Even though I was told to open-code some
other functions in previous versions.
Post by Pavel Hrdina
Post by John Ferlan
Post by Martin Kletzander
+ for (cache = 0; cache < a_type->nsizes; cache++) {
+ unsigned long long *size = a_type->sizes[cache];
+ virBitmapPtr a_mask = NULL;
+ virBitmapPtr f_mask = NULL;
+ virResctrlInfoPerLevelPtr i_level = r_info->levels[level];
+ virResctrlInfoPerTypePtr i_type = i_level->types[type];
+ unsigned long long granularity;
+ unsigned long long need_bits;
+ size_t i = 0;
+ ssize_t pos = -1;
+ ssize_t last_bits = 0;
+ ssize_t last_pos = -1;
+
+ if (!size)
+ continue;
+
+ if (cache >= f_type->nmasks) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache with id %u does not exists for level %d"),
+ cache, level);
+ goto cleanup;
+ }
+
+ f_mask = f_type->masks[cache];
+ if (!f_mask) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache level %d id %u does not support tuning for "
+ "scope type '%s'"),
+ level, cache, virCacheTypeToString(type));
+ goto cleanup;
+ }
+
+ granularity = i_type->size / i_type->bits;
+ need_bits = *size / granularity;
+
+ if (*size % granularity) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache allocation of size %llu is not "
+ "divisible by granularity %llu"),
+ *size, granularity);
+ goto cleanup;
+ }
+
+ if (need_bits < i_type->min_cbm_bits) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache allocation of size %llu is smaller "
+ "than the minimum allowed allocation %llu"),
+ *size, granularity * i_type->min_cbm_bits);
+ goto cleanup;
+ }
+
+ while ((pos = virBitmapNextSetBit(f_mask, pos)) >= 0) {
+ ssize_t pos_clear = virBitmapNextClearBit(f_mask, pos);
+ ssize_t bits;
+
+ if (pos_clear < 0)
+ pos_clear = virBitmapSize(f_mask);
+
+ bits = pos_clear - pos;
+
+ /* Not enough bits, move on and skip all of them */
+ if (bits < need_bits) {
+ pos = pos_clear;
+ continue;
+ }
+
+ /* This fits perfectly */
+ if (bits == need_bits) {
+ last_pos = pos;
+ break;
+ }
+
+ /* Remember the smaller region if we already found on before */
+ if (last_pos < 0 || (last_bits && bits < last_bits)) {
+ last_bits = bits;
+ last_pos = pos;
+ }
+
+ pos = pos_clear;
+ }
+
+ if (last_pos < 0) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Not enough room for allocation of "
+ "%llu bytes for level %u cache %u "
+ "scope type '%s'"),
+ *size, level, cache,
+ virCacheTypeToString(type));
+ goto cleanup;
+ }
+
+ a_mask = virBitmapNew(i_type->bits);
+ for (i = last_pos; i < last_pos + need_bits; i++) {
+ ignore_value(virBitmapSetBit(a_mask, i));
+ ignore_value(virBitmapClearBit(f_mask, i));
+ }
+
+ if (a_type->nmasks <= cache) {
+ if (VIR_EXPAND_N(a_type->masks, a_type->nmasks,
+ cache - a_type->nmasks + 1) < 0) {
+ virBitmapFree(a_mask);
+ goto cleanup;
+ }
+ }
+ a_type->masks[cache] = a_mask;
+ }
+ }
+ }
+
+ ret = 0;
+ virObjectUnref(alloc_free);
+ return ret;
+}
+
+
+/* This checks if the directory for the alloc exists. If not it tries to create
+ * it and apply appropriate alloc settings. */
An overly simplified description for all that happens here.
Post by Martin Kletzander
+int
+virResctrlAllocCreate(virResctrlInfoPtr r_info,
s/r_info/resctrl/ for consistency?
Post by Martin Kletzander
+ virResctrlAllocPtr alloc,
+ const char *drivername,
+ const char *machinename)
+{
+ char *schemata_path = NULL;
+ char *alloc_str = NULL;
+ int ret = -1;
+ int lockfd = -1;
+
+ if (!alloc)
+ return 0;
+
+ if (!r_info) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Resource control is not supported on this host"));
+ return -1;
+ }
+
+ if (!alloc->path &&
+ virAsprintf(&alloc->path, "%s/%s-%s-%s",
+ SYSFS_RESCTRL_PATH, drivername, machinename, alloc->id) < 0)
+ return -1;
FWIW: From one of the .0 responses, this is where you get that
be superfluous, but it's not 100% clear what the options from the
Yeah, already fixed in the next version.
Post by Pavel Hrdina
Post by John Ferlan
In any case, if I'm reading things correctly it seems we would be saving
the guest data in the /sys/fs/resctrl path - is that the "right" place?
I can see for Info (e.g. host) level data that's read, sure, but for
guest (or Alloc) data that's written (and read) it's less clear in my
mind. Worried a bit about polluting /sys, some permissions issues
there, and a bit of future proofing - similar to cgroup layouts which
haven't remained constant through time. It would seem guests using this
would need to be sufficiently privileged or essentially not a session
guest...
Shouldn't things go in the guest libDir? IOW: vm->privateData->libDir
that could be passed from qemuProcessResctrlCreate and avoid replicating
the build-up logic here?
I would think avoiding /sys for guest data would probably be a good
idea. If the host dies how do we clean up after ourselves? At least if
we use guest directories, we can 'easily' document how to clean up.
As I've wrote, we need to write to /sys/fs/resctrl in order to apply
the cache allocation for a guest. This is not saving the "guest data"
anywhere, that's already done by having it configured in the guest XML.
<https://github.com/intel/intel-cmt-cat/wiki/resctrl>
Pavel
Post by John Ferlan
Post by Martin Kletzander
+
+ /* Check if this allocation was already created */
+ if (virFileIsDir(alloc->path))
+ return 0;
+
+ if (virFileExists(alloc->path)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Path '%s' for resctrl allocation exists and is not a "
+ "directory"), alloc->path);
+ goto cleanup;
+ }
+
+ lockfd = virResctrlLockWrite();
+ if (lockfd < 0)
+ goto cleanup;
+
+ if (virResctrlAllocMasksAssign(r_info, alloc) < 0)
+ goto cleanup;
+
+ alloc_str = virResctrlAllocFormat(alloc);
+ if (!alloc_str)
+ goto cleanup;
+
There is a *lot* of magic going on here regarding how the above calls
will end up generating alloc_str and then how this schemata file gets
populated with data that I really think is not self documenting.
Post by Martin Kletzander
+ if (virAsprintf(&schemata_path, "%s/schemata", alloc->path) < 0)
+ goto cleanup;
+
+ if (virFileMakePath(alloc->path) < 0) {
+ virReportSystemError(errno,
+ _("Cannot create resctrl directory '%s'"),
+ alloc->path);
+ goto cleanup;
+ }
+
+ if (virFileWriteStr(schemata_path, alloc_str, 0) < 0) {
+ rmdir(alloc->path);
open coded virResctrlAllocRemove... could go with virFileDeleteTree too
a) not really, I need different error message here

b) could not, it's a special filesystem (as explained before)
Post by Pavel Hrdina
Post by John Ferlan
Post by Martin Kletzander
+ virReportSystemError(errno,
+ _("Cannot write into schemata file '%s'"),
+ schemata_path);
+ goto cleanup;
+ }
+
+ ret = 0;
+ virResctrlUnlock(lockfd);
+ VIR_FREE(alloc_str);
+ VIR_FREE(schemata_path);
+ return ret;
+}
+
+
Might be nice to describe what this is for... Why does this need to be
generated/saved?
Post by Martin Kletzander
+int
+virResctrlAllocAddPID(virResctrlAllocPtr alloc,
+ pid_t pid)
+{
+ char *tasks = NULL;
+ char *pidstr = NULL;
+ int ret = 0;
+
+ if (!alloc->path) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot add pid to non-existing resctrl allocation"));
+ return -1;
+ }
+
Should we do any locking here?
No need to, we're not changing schemata.
Post by Pavel Hrdina
Post by John Ferlan
Post by Martin Kletzander
+ if (virAsprintf(&tasks, "%s/tasks", alloc->path) < 0)
+ return -1;
+
+ if (virAsprintf(&pidstr, "%lld", (long long int) pid) < 0)
+ goto cleanup;
+
+ if (virFileWriteStr(tasks, pidstr, 0) < 0) {
+ virReportSystemError(errno,
+ _("Cannot write pid in tasks file '%s'"),
+ tasks);
+ goto cleanup;
+ }
unplugged? Can the cachetunes be updated in that manner?
Hotplug and unplug needs to be fixed, yes.
Post by Pavel Hrdina
Post by John Ferlan
Post by Martin Kletzander
+
+ ret = 0;
+ VIR_FREE(tasks);
+ VIR_FREE(pidstr);
+ return ret;
+}
+
+
+int
+virResctrlAllocRemove(virResctrlAllocPtr alloc)
+{
+ int ret = 0;
+
+ if (!alloc->path)
+ return 0;
+
+ VIR_DEBUG("Removing resctrl allocation %s", alloc->path);
+ if (rmdir(alloc->path) != 0 && errno != ENOENT) {
Is it only one level of directory or multiple? e.g. would
virFileDeleteTree be better? In fact overall it may be better.
I hope I found everything ;-)
John
Post by Martin Kletzander
+ ret = -errno;
+ VIR_ERROR(_("Unable to remove %s (%d)"), alloc->path, errno);
+ }
+
+ return ret;
+}
diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
index 471c02f28dcd..0fbd9dd3acfb 100644
--- a/src/util/virresctrl.h
+++ b/src/util/virresctrl.h
@@ -68,4 +68,66 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
size_t *ncontrols,
virResctrlInfoPerCachePtr **controls);
+/* Alloc-related things */
+typedef struct _virResctrlAlloc virResctrlAlloc;
+typedef virResctrlAlloc *virResctrlAllocPtr;
+
+typedef int virResctrlAllocForeachSizeCallback(unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size,
+ void *opaque);
+
+virResctrlAllocPtr
+virResctrlAllocNew(void);
+
+bool
+virResctrlAllocIsEmpty(virResctrlAllocPtr resctrl);
+
+int
+virResctrlAllocUpdateSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size);
+
+int
+virResctrlAllocUpdateMask(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ virBitmapPtr mask);
+
+int
+virResctrlAllocSetSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size);
+
+int
+virResctrlAllocForeachSize(virResctrlAllocPtr resctrl,
+ virResctrlAllocForeachSizeCallback cb,
+ void *opaque);
+
+int
+virResctrlAllocSetID(virResctrlAllocPtr alloc,
+ const char *id);
+
+char *
+virResctrlAllocFormat(virResctrlAllocPtr alloc);
+
+int
+virResctrlAllocCreate(virResctrlInfoPtr r_info,
+ virResctrlAllocPtr alloc,
+ const char *drivername,
+ const char *machinename);
+
+int
+virResctrlAllocAddPID(virResctrlAllocPtr alloc,
+ pid_t pid);
+
+int
+virResctrlAllocRemove(virResctrlAllocPtr alloc);
+
#endif /* __VIR_RESCTRL_H__ */
diff --git a/src/util/virresctrlpriv.h b/src/util/virresctrlpriv.h
new file mode 100644
index 000000000000..932ff7329c8c
--- /dev/null
+++ b/src/util/virresctrlpriv.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __VIR_RESCTRL_PRIV_H__
+# define __VIR_RESCTRL_PRIV_H__
+
+# include "virresctrl.h"
+
+virResctrlAllocPtr
+virResctrlAllocGetFree(virResctrlInfoPtr resctrl);
+
+#endif /* __VIR_RESCTRL_PRIV_H__ */
--
libvir-list mailing list
https://www.redhat.com/mailman/listinfo/libvir-list
乔立勇(Eli Qiao)
2017-12-14 02:10:17 UTC
Permalink
hi Martin,

Thanks for reaching me, sure, I will try to testing your patch.

BTW, do you have a github repo/branch for these which I can directly pull?

Best regards
- Eli

a leaf duckweed belongs to the sea, where not to meet in life
Added stuff that Pavel wanted, removed some testcases that I still have in
my
repo, but it's way to complicated to add properly, so for now I would just
go
with this and manual testing of starting some domains. I got no hardware
to
test this on, currently, so some testing is needed before pushing this.
@Eli: Can you help with the testing?
Rename virResctrlInfo to virResctrlInfoPerCache
util: Add virResctrlInfo
conf: Use virResctrlInfo in capabilities
util: Remove now-unneeded resctrl functions
resctrl: Add functions to work with resctrl allocations
conf: Add support for cputune/cachetune
tests: Add virresctrltest
qemu: Add support for resctrl
docs: Add CAT (resctrl) support into news.xml
docs/formatdomain.html.in | 54 +
docs/news.xml | 9 +
docs/schemas/domaincommon.rng | 32 +
po/POTFILES.in | 1 +
src/Makefile.am | 2 +-
src/conf/capabilities.c | 55 +-
src/conf/capabilities.h | 4 +-
src/conf/domain_conf.c | 251 ++++
src/conf/domain_conf.h | 13 +
src/libvirt_private.syms | 16 +-
src/qemu/qemu_process.c | 61 +-
src/util/virresctrl.c | 1380
++++++++++++++++++--
src/util/virresctrl.h | 86 +-
src/util/virresctrlpriv.h | 27 +
tests/Makefile.am | 8 +-
tests/genericxml2xmlindata/cachetune-cdp.xml | 36 +
.../cachetune-colliding-allocs.xml | 30 +
.../cachetune-colliding-tunes.xml | 32 +
.../cachetune-colliding-types.xml | 30 +
tests/genericxml2xmlindata/cachetune-small.xml | 29 +
tests/genericxml2xmlindata/cachetune.xml | 33 +
tests/genericxml2xmltest.c | 10 +
tests/virresctrldata/resctrl-cdp.schemata | 2 +
.../virresctrldata/resctrl-skx-twocaches.schemata | 1 +
tests/virresctrldata/resctrl-skx.schemata | 1 +
tests/virresctrldata/resctrl.schemata | 1 +
tests/virresctrltest.c | 102 ++
27 files changed, 2174 insertions(+), 132 deletions(-)
create mode 100644 src/util/virresctrlpriv.h
create mode 100644 tests/genericxml2xmlindata/cachetune-cdp.xml
create mode 100644 tests/genericxml2xmlindata/
cachetune-colliding-allocs.xml
create mode 100644 tests/genericxml2xmlindata/
cachetune-colliding-tunes.xml
create mode 100644 tests/genericxml2xmlindata/
cachetune-colliding-types.xml
create mode 100644 tests/genericxml2xmlindata/cachetune-small.xml
create mode 100644 tests/genericxml2xmlindata/cachetune.xml
create mode 100644 tests/virresctrldata/resctrl-cdp.schemata
create mode 100644 tests/virresctrldata/resctrl-skx-twocaches.schemata
create mode 100644 tests/virresctrldata/resctrl-skx.schemata
create mode 100644 tests/virresctrldata/resctrl.schemata
create mode 100644 tests/virresctrltest.c
--
2.15.1
Eli
2017-12-14 07:54:35 UTC
Permalink
Post by 乔立勇(Eli Qiao)
hi Martin,
Thanks for reaching me, sure, I will try to testing your patch.
BTW, do you have a github repo/branch for these  which I can directly
pull?
Sorry, ignore this, I'v pulled your patch from thunderbird client, will
try to find some environments and time slot to do the testing.

If possible would you please share some guest domain xmls which can be
used as testing?
I'v got a 2-sockets xeon broadwell CPU by hand which can be used as testing.
Post by 乔立勇(Eli Qiao)
Best regards
- Eli
a leaf duckweed belongs to the sea, where not to meet in life
Added stuff that Pavel wanted, removed some testcases that I still
have in my
repo, but it's way to complicated to add properly, so for now I
would just go
with this and manual testing of starting some domains.  I got no
hardware to
test this on, currently, so some testing is needed before pushing this.
@Eli: Can you help with the testing?
  Rename virResctrlInfo to virResctrlInfoPerCache
  util: Add virResctrlInfo
  conf: Use virResctrlInfo in capabilities
  util: Remove now-unneeded resctrl functions
  resctrl: Add functions to work with resctrl allocations
  conf: Add support for cputune/cachetune
  tests: Add virresctrltest
  qemu: Add support for resctrl
  docs: Add CAT (resctrl) support into news.xml
 docs/formatdomain.html.in <http://formatdomain.html.in>          
              |   54 +
 docs/news.xml                                      |    9 +
 docs/schemas/domaincommon.rng                      |   32 +
 po/POTFILES.in                                     |    1 +
 src/Makefile.am                                    |    2 +-
 src/conf/capabilities.c                            |   55 +-
 src/conf/capabilities.h                            |    4 +-
 src/conf/domain_conf.c                             |  251 ++++
 src/conf/domain_conf.h                             |   13 +
 src/libvirt_private.syms                           |   16 +-
 src/qemu/qemu_process.c                            |   61 +-
 src/util/virresctrl.c                              | 1380
++++++++++++++++++--
 src/util/virresctrl.h                              |   86 +-
 src/util/virresctrlpriv.h                          |   27 +
 tests/Makefile.am                                  |    8 +-
 tests/genericxml2xmlindata/cachetune-cdp.xml       |  36 +
 .../cachetune-colliding-allocs.xml                 |  30 +
 .../cachetune-colliding-tunes.xml                  |  32 +
 .../cachetune-colliding-types.xml                  |  30 +
 tests/genericxml2xmlindata/cachetune-small.xml     |  29 +
 tests/genericxml2xmlindata/cachetune.xml           |  33 +
 tests/genericxml2xmltest.c                         |   10 +
 tests/virresctrldata/resctrl-cdp.schemata          |   2 +
 .../virresctrldata/resctrl-skx-twocaches.schemata  |   1 +
 tests/virresctrldata/resctrl-skx.schemata          |   1 +
 tests/virresctrldata/resctrl.schemata              |   1 +
 tests/virresctrltest.c                             |  102 ++
 27 files changed, 2174 insertions(+), 132 deletions(-)
 create mode 100644 src/util/virresctrlpriv.h
 create mode 100644 tests/genericxml2xmlindata/cachetune-cdp.xml
 create mode 100644
tests/genericxml2xmlindata/cachetune-colliding-allocs.xml
 create mode 100644
tests/genericxml2xmlindata/cachetune-colliding-tunes.xml
 create mode 100644
tests/genericxml2xmlindata/cachetune-colliding-types.xml
 create mode 100644 tests/genericxml2xmlindata/cachetune-small.xml
 create mode 100644 tests/genericxml2xmlindata/cachetune.xml
 create mode 100644 tests/virresctrldata/resctrl-cdp.schemata
 create mode 100644
tests/virresctrldata/resctrl-skx-twocaches.schemata
 create mode 100644 tests/virresctrldata/resctrl-skx.schemata
 create mode 100644 tests/virresctrldata/resctrl.schemata
 create mode 100644 tests/virresctrltest.c
--
2.15.1
Michal Privoznik
2017-12-14 09:06:52 UTC
Permalink
Post by Eli
Post by 乔立勇(Eli Qiao)
hi Martin,
Thanks for reaching me, sure, I will try to testing your patch.
BTW, do you have a github repo/branch for these  which I can directly
pull?
Sorry, ignore this, I'v pulled your patch from thunderbird client, will
try to find some environments and time slot to do the testing.
If possible would you please share some guest domain xmls which can be
used as testing?
There are some in patch 6/9.

Michal
Eli
2017-12-14 11:46:27 UTC
Permalink
@Eli: Can you help with the testing?
It seems the interface is only implement the isolated case, I remember
that you have proposed that for some overlap case?

I have not see the whole patch set yet, but I have some quick testing on
you patch, will try to find more time to review patches (Currently I am
maintain another daemon software which is dedicated for RDT feature
called RMD)

Only the issue 1 is the true issue, for the others, I think they should
be discussed, or be treat as the 'known issue'.

My env:

L1d cache:             32K
L1i cache:             32K
L2 cache:              256K
L3 cache:              56320K
NUMA node0 CPU(s):     0-21,44-65
NUMA node1 CPU(s):     22-43,66-87


virsh capabilities:

171 <cache>
172       <bank id='0' level='3' type='both' size='55' unit='MiB'
cpus='0-21,44-65'>
173         <control granularity='2816' unit='KiB' type='both'
maxAllocs='16'/>
174 </bank>
175       <bank id='1' level='3' type='both' size='55' unit='MiB'
cpus='22-43,66-87'>
176         <control granularity='2816' unit='KiB' type='both'
maxAllocs='16'/>
177 </bank>
178     </cache>

*Issue:

*1. Doesn't support asynchronous cache allocation. e.g, I need provide
all cache allocation require ways, but I am only care about the
allocation on one of the cache id, cause the VM won't be schedule to
another cache (socket).

So I got this error if I define the domain like this:

  <vcpu placement='static'>6</vcpu>
  <cputune>
    <emulatorpin cpuset='0,37-38,44,81-82'/>
    <cachetune vcpus='0-4'>
*      <cache id='0' level='3' type='both' size='2816' unit='KiB'/>
       ^^^ not provide cache id='1'
*    </cachetune>


***@s2600wt:~# virsh start kvm-cat
error: Failed to start domain kvm-cat
error: Cannot write into schemata file
'/sys/fs/resctrl/qemu-qemu-13-kvm-cat-0-4/schemata': Invalid argument

This behavior is not correct.

I expect the CBM will be look like:

***@s2600wt:/sys/fs/resctrl# cat qemu-qemu-14-kvm-cat-0-4/*
000000,00000000,00000000
L3:0=80;1=fffff *(no matter what it is, cause my VM won't be schedule on
it, ether I have deinfe the vcpu->cpu pining or, I assume that kernel
won't schedule it to cache 1)

*Or at least, restrict xml when I define this domain, tell me I need to
provide all cache ids (even if I have 4 cache but I only run my VM on
'cache 0')
*
*2. cache way fragment (no good answers)

I see that for now we allocate cache ways start from the low bits, newly
created VM will allocate cache from the next way, if some of the VM
(allocated ways in the middle, eg it's schemata is 00100) destroyed, and
that slot (1 cache way) may not fit others and it will be wasted, But,
how can we handle this, seems no good way, rearrangement? That will lead
cache missing in a time window I think.

3. The admin/user should manually operate the default resource group,
that's is to say, after resctrl is mounted, the admin/user should
manually change the schemata of default group. Will libvirt provide
interface/API to handle it?

4. Will provide some APIs like `FreeCacheWay` to end user to see how
many cache ways could be allocated on the host?

    For other users/orchestrator (nova), they may need to know if a VM
can schedule on the host, but the cache ways is not liner, it may have
fragment.

5, What if other application want to have some shared cache ways with
some of the VM?
    Libvirt for now try to read all of the resource group (instead of
maintain the consumed cache ways itself), so if another resource group
was created under /sys/fs/resctl, and the schemata of it is "FFFFF",
then libvirt will report not enough room for new VM. But the user
actually want to have another Appliation(e.g. ovs, dpdk pmds) share
cache ways with the VM created by libvirt.
**
I will try to more cases..

Thanks Eli.
  Rename virResctrlInfo to virResctrlInfoPerCache
  util: Add virResctrlInfo
  conf: Use virResctrlInfo in capabilities
  util: Remove now-unneeded resctrl functions
  resctrl: Add functions to work with resctrl allocations
  conf: Add support for cputune/cachetune
  tests: Add virresctrltest
  qemu: Add support for resctrl
  docs: Add CAT (resctrl) support into news.xml
 docs/formatdomain.html.in <http://formatdomain.html.in>        
                |   54 +
 docs/news.xml                                      |    9 +
 docs/schemas/domaincommon.rng                      |   32 +
 po/POTFILES.in                                     |    1 +
 src/Makefile.am                                    |    2 +-
 src/conf/capabilities.c                            |   55 +-
 src/conf/capabilities.h                            |    4 +-
 src/conf/domain_conf.c                             |  251 ++++
 src/conf/domain_conf.h                             |   13 +
 src/libvirt_private.syms                           |   16 +-
 src/qemu/qemu_process.c                            |   61 +-
 src/util/virresctrl.c                              | 1380
++++++++++++++++++--
 src/util/virresctrl.h                              |   86 +-
 src/util/virresctrlpriv.h                          |   27 +
 tests/Makefile.am                                  |    8 +-
 tests/genericxml2xmlindata/cachetune-cdp.xml  |   36 +
 .../cachetune-colliding-allocs.xml  |   30 +
 .../cachetune-colliding-tunes.xml |   32 +
 .../cachetune-colliding-types.xml |   30 +
 tests/genericxml2xmlindata/cachetune-small.xml  |   29 +
 tests/genericxml2xmlindata/cachetune.xml  |   33 +
 tests/genericxml2xmltest.c                         |   10 +
 tests/virresctrldata/resctrl-cdp.schemata |    2 +
 .../virresctrldata/resctrl-skx-twocaches.schemata |    1 +
 tests/virresctrldata/resctrl-skx.schemata |    1 +
 tests/virresctrldata/resctrl.schemata |    1 +
 tests/virresctrltest.c                             |  102 ++
 27 files changed, 2174 insertions(+), 132 deletions(-)
 create mode 100644 src/util/virresctrlpriv.h
 create mode 100644 tests/genericxml2xmlindata/cachetune-cdp.xml
 create mode 100644
tests/genericxml2xmlindata/cachetune-colliding-allocs.xml
 create mode 100644
tests/genericxml2xmlindata/cachetune-colliding-tunes.xml
 create mode 100644
tests/genericxml2xmlindata/cachetune-colliding-types.xml
 create mode 100644 tests/genericxml2xmlindata/cachetune-small.xml
 create mode 100644 tests/genericxml2xmlindata/cachetune.xml
 create mode 100644 tests/virresctrldata/resctrl-cdp.schemata
 create mode 100644
tests/virresctrldata/resctrl-skx-twocaches.schemata
 create mode 100644 tests/virresctrldata/resctrl-skx.schemata
 create mode 100644 tests/virresctrldata/resctrl.schemata
 create mode 100644 tests/virresctrltest.c
--
2.15.1
Eli
2017-12-14 11:54:57 UTC
Permalink
Post by Eli
I will try to more cases..
Sorry, I forget to limit line word as 80.

Missing one issue I see:

If the host enabled CDP, which is to see the host will report l3 cache type
code and data. when user don't want code/data cache ways allocated
separated, for current implement, it will report not support `both` type
l3 cache.

But we can improve this as make code and data schemata the same
e.g, if host enabled CDP, but user request 2 `both` type l3 cache.

We can write the schemata looks like:

L3DATA:0=3
L3CODE:0=3
Post by Eli
Thanks Eli.
Post by 乔立勇(Eli Qiao)
  Rename virResctrlInfo to virResctrlInfoPerCache
  util: Add virResctrlInfo
  conf: Use virResctrlInfo in capabilities
  util: Remove now-unneeded resctrl functions
  resctrl: Add functions to work with resctrl allocations
  conf: Add support for cputune/cachetune
  tests: Add virresctrltest
  qemu: Add support for resctrl
  docs: Add CAT (resctrl) support into news.xml
 docs/formatdomain.html.in <http://formatdomain.html.in>        
          |   54 +
 docs/news.xml                                      | 9 +
 docs/schemas/domaincommon.rng                      |  32 +
 po/POTFILES.in                                     | 1 +
 src/Makefile.am                                    | 2 +-
 src/conf/capabilities.c                            |  55 +-
 src/conf/capabilities.h                            | 4 +-
 src/conf/domain_conf.c                             | 251 ++++
 src/conf/domain_conf.h                             |  13 +
 src/libvirt_private.syms                           |  16 +-
 src/qemu/qemu_process.c                            |  61 +-
 src/util/virresctrl.c                              | 1380
++++++++++++++++++--
 src/util/virresctrl.h                              |  86 +-
 src/util/virresctrlpriv.h                          |  27 +
 tests/Makefile.am                                  | 8 +-
 tests/genericxml2xmlindata/cachetune-cdp.xml  |   36 +
 .../cachetune-colliding-allocs.xml  |   30 +
 .../cachetune-colliding-tunes.xml |   32 +
 .../cachetune-colliding-types.xml |   30 +
 tests/genericxml2xmlindata/cachetune-small.xml  |   29 +
 tests/genericxml2xmlindata/cachetune.xml  |   33 +
 tests/genericxml2xmltest.c                         |  10 +
 tests/virresctrldata/resctrl-cdp.schemata |    2 +
 .../virresctrldata/resctrl-skx-twocaches.schemata |    1 +
 tests/virresctrldata/resctrl-skx.schemata |    1 +
 tests/virresctrldata/resctrl.schemata |    1 +
 tests/virresctrltest.c                             | 102 ++
 27 files changed, 2174 insertions(+), 132 deletions(-)
 create mode 100644 src/util/virresctrlpriv.h
 create mode 100644 tests/genericxml2xmlindata/cachetune-cdp.xml
 create mode 100644
tests/genericxml2xmlindata/cachetune-colliding-allocs.xml
 create mode 100644
tests/genericxml2xmlindata/cachetune-colliding-tunes.xml
 create mode 100644
tests/genericxml2xmlindata/cachetune-colliding-types.xml
 create mode 100644 tests/genericxml2xmlindata/cachetune-small.xml
 create mode 100644 tests/genericxml2xmlindata/cachetune.xml
 create mode 100644 tests/virresctrldata/resctrl-cdp.schemata
 create mode 100644
tests/virresctrldata/resctrl-skx-twocaches.schemata
 create mode 100644 tests/virresctrldata/resctrl-skx.schemata
 create mode 100644 tests/virresctrldata/resctrl.schemata
 create mode 100644 tests/virresctrltest.c
--
2.15.1
Martin Kletzander
2017-12-15 09:06:05 UTC
Permalink
Post by Eli
@Eli: Can you help with the testing?
It seems the interface is only implement the isolated case, I remember
that you have proposed that for some overlap case?
Hi, yes. It got a bit more complicated so I want to do this
incrementally. First enable the easiest cases, then add APIs to manage
the system's default group, make type='both' allocation work on
CDP-enabled hosts, add APIs for modifying cachetunes for live and
stopped domains, add support for memory bandwidth allocation, and so
on. This is too much stuff to add in one go.

I guess I forgot to add this info to the cover letter (I think I did at
least for the previous version).

I also wasted some time on the tests, some of them are not even in the
patchset, have a look at previous version if you want to see them.
Post by Eli
I have not see the whole patch set yet, but I have some quick testing on
you patch, will try to find more time to review patches (Currently I am
maintain another daemon software which is dedicated for RDT feature
called RMD)
Only the issue 1 is the true issue, for the others, I think they should
be discussed, or be treat as the 'known issue'.
L1d cache:             32K
L1i cache:             32K
L2 cache:              256K
L3 cache:              56320K
NUMA node0 CPU(s):     0-21,44-65
NUMA node1 CPU(s):     22-43,66-87
171 <cache>
172       <bank id='0' level='3' type='both' size='55' unit='MiB'
cpus='0-21,44-65'>
173         <control granularity='2816' unit='KiB' type='both'
maxAllocs='16'/>
174 </bank>
175       <bank id='1' level='3' type='both' size='55' unit='MiB'
cpus='22-43,66-87'>
176         <control granularity='2816' unit='KiB' type='both'
maxAllocs='16'/>
177 </bank>
178     </cache>
*1. Doesn't support asynchronous cache allocation. e.g, I need provide
all cache allocation require ways, but I am only care about the
allocation on one of the cache id, cause the VM won't be schedule to
another cache (socket).
Oh, really? This is not written in the kernel documentation. Can't the
unspecified caches just inherit the setting from the default group?
That would make sense. It would also automatically adjust if the
default system one is changed.

Do you have any contact to anyone working on the RDT in the kernel? I
think this would save time and effort to anyone who will be using the
feature.
Post by Eli
  <vcpu placement='static'>6</vcpu>
  <cputune>
    <emulatorpin cpuset='0,37-38,44,81-82'/>
    <cachetune vcpus='0-4'>
*      <cache id='0' level='3' type='both' size='2816' unit='KiB'/>
       ^^^ not provide cache id='1'
*    </cachetune>
error: Failed to start domain kvm-cat
error: Cannot write into schemata file
'/sys/fs/resctrl/qemu-qemu-13-kvm-cat-0-4/schemata': Invalid argument
Oh, I have to figure out why is there 'qemu-qemu' :D
Post by Eli
This behavior is not correct.
000000,00000000,00000000
L3:0=80;1=fffff *(no matter what it is, cause my VM won't be schedule on
it, ether I have deinfe the vcpu->cpu pining or, I assume that kernel
won't schedule it to cache 1)
Well, it matters. It would have to have all zeros there so that that
part of the cache is not occupied.
Post by Eli
*Or at least, restrict xml when I define this domain, tell me I need to
provide all cache ids (even if I have 4 cache but I only run my VM on
'cache 0')
*
We could do that. It would allow us to make this better (or lift the
restriction) in case this is "fixed" in the kernel.

Or at least in the future we could do this to meet the users half-way:

- Any vcpus that have cachetune enabled for them must also be pinned

- Users need to specify allocations for all cache ids that the vcpu
might run on (according to the pinning acquired from before), for all
others we'd just simply set it to all zeros or the same bitmask as the
system's default group.

But for now we could just copy the system's setting to unspecified
caches or request the user to specify everything.
Post by Eli
*2. cache way fragment (no good answers)
I see that for now we allocate cache ways start from the low bits, newly
created VM will allocate cache from the next way, if some of the VM
(allocated ways in the middle, eg it's schemata is 00100) destroyed, and
that slot (1 cache way) may not fit others and it will be wasted, But,
how can we handle this, seems no good way, rearrangement? That will lead
cache missing in a time window I think.
Avoiding fragmentation is not a simple thing. It's impossible to do
without any moving, which might be unwanted. This will be solved by
providing an API that will tell you move the allocation if you so
desire. For now I at least try allocating the smallest region into
which the requested allocation fits, so that the unallocated parts are
as big as possible.
Post by Eli
3. The admin/user should manually operate the default resource group,
that's is to say, after resctrl is mounted, the admin/user should
manually change the schemata of default group. Will libvirt provide
interface/API to handle it?
Yes, this is planned.
Post by Eli
4. Will provide some APIs like `FreeCacheWay` to end user to see how
many cache ways could be allocated on the host?
Yes, this should be provided by an API as well.
Post by Eli
    For other users/orchestrator (nova), they may need to know if a VM
can schedule on the host, but the cache ways is not liner, it may have
fragment.
5, What if other application want to have some shared cache ways with
some of the VM?
    Libvirt for now try to read all of the resource group (instead of
maintain the consumed cache ways itself), so if another resource group
was created under /sys/fs/resctl, and the schemata of it is "FFFFF",
then libvirt will report not enough room for new VM. But the user
actually want to have another Appliation(e.g. ovs, dpdk pmds) share
cache ways with the VM created by libvirt.
Adding support for shared allocations is planned as I said before,
however this is something that will be needed to be taken care of
differently anyway. I don't know how specific the use case would be,
but let's say you want to have 8 cache ways allocated for the VM, but
share only 4 of them with some DPDK PMD. You can't use "shared" because
that would just take some 8 bits even when some of them might be shared
with the system's default group. Moreover it means that the allocation
can be shared with machines ran in the future. So in this case you need
to have the 8 bits exclusively allocated and then (only after the
machine is started) pin the PMD process to those 4 cache ways.
Post by Eli
If the host enabled CDP, which is to see the host will report l3 cache type
code and data. when user don't want code/data cache ways allocated
separated, for current implement, it will report not support `both` type
l3 cache.
But we can improve this as make code and data schemata the same
e.g, if host enabled CDP, but user request 2 `both` type l3 cache.
L3DATA:0=3
L3CODE:0=3
Yes, that's what we want to achieve, but again, in a future patchset.

Hope that answers your questions. Thanks for trying it out, it is
really complicated to develop something like this without the actual
hardware to test it on.

Have a nice day,
Martin
Eli
2017-12-15 15:11:48 UTC
Permalink
Post by Eli
It seems the interface is only implement the isolated case, I remember
that you have proposed that for some overlap case?
Hi, yes.  It got a bit more complicated so I want to do this
incrementally.  First enable the easiest cases, then add APIs to manage
the system's default group, make type='both' allocation work on
CDP-enabled hosts, add APIs for modifying cachetunes for live and
stopped domains, add support for memory bandwidth allocation, and so
on.  This is too much stuff to add in one go.
I guess I forgot to add this info to the cover letter (I think I did at
least for the previous version).
I also wasted some time on the tests, some of them are not even in the
patchset, have a look at previous version if you want to see them.
ok, sorry for not watching for libvirt list for some time..
Post by Eli
I have not see the whole patch set yet, but I have some quick testing on
you patch, will try to find more time to review patches (Currently I am
maintain another daemon software which is dedicated for RDT feature
called RMD)
Only the issue 1 is the true issue, for the others, I think they should
be discussed, or be treat as the 'known issue'.
L1d cache:             32K
L1i cache:             32K
L2 cache:              256K
L3 cache:              56320K
NUMA node0 CPU(s):     0-21,44-65
NUMA node1 CPU(s):     22-43,66-87
171 <cache>
172       <bank id='0' level='3' type='both' size='55' unit='MiB'
cpus='0-21,44-65'>
173         <control granularity='2816' unit='KiB' type='both'
maxAllocs='16'/>
174 </bank>
175       <bank id='1' level='3' type='both' size='55' unit='MiB'
cpus='22-43,66-87'>
176         <control granularity='2816' unit='KiB' type='both'
maxAllocs='16'/>
177 </bank>
178     </cache>
*1. Doesn't support asynchronous cache allocation. e.g, I need provide
all cache allocation require ways, but I am only care about the
allocation on one of the cache id, cause the VM won't be schedule to
another cache (socket).
Oh, really?  This is not written in the kernel documentation. Can't the
unspecified caches just inherit the setting from the default group?
That would make sense.  It would also automatically adjust if the
default system one is changed.
Maybe I express myself not clearly, yes the caches will be added to
default resource group
Do you have any contact to anyone working on the RDT in the kernel? I
think this would save time and effort to anyone who will be using the
feature.
Sure, /***@intel.com and Tony Luck <***@intel.com>

kernel doc
https://github.com/torvalds/linux/blob/master/Documentation/x86/intel_rdt_ui.txt

///////
Post by Eli
  <vcpu placement='static'>6</vcpu>
  <cputune>
    <emulatorpin cpuset='0,37-38,44,81-82'/>
    <cachetune vcpus='0-4'>
*      <cache id='0' level='3' type='both' size='2816' unit='KiB'/>
       ^^^ not provide cache id='1'
*    </cachetune>
error: Failed to start domain kvm-cat
error: Cannot write into schemata file
'/sys/fs/resctrl/qemu-qemu-13-kvm-cat-0-4/schemata': Invalid argument
Oh, I have to figure out why is there 'qemu-qemu' :D
Post by Eli
This behavior is not correct.
000000,00000000,00000000
L3:0=80;1=fffff *(no matter what it is, cause my VM won't be schedule on
it, ether I have deinfe the vcpu->cpu pining or, I assume that kernel
won't schedule it to cache 1)
Well, it matters.  It would have to have all zeros there so that that
part of the cache is not occupied.
Well, the hardware won't allow you to specify 0 ways , at least 1 (some
of the platform it's 2 ways)
From my previous experence, I set it to fffff (it will be treat as 0 in
the code)

it's decided by min_cbm_bits

see
https://github.com/torvalds/linux/blob/master/Documentation/x86/intel_rdt_ui.txt#L48:14
Post by Eli
*Or at least, restrict xml when I define this domain, tell me I need to
provide all cache ids (even if I have 4 cache but I only run my VM on
'cache 0')
*
We could do that.  It would allow us to make this better (or lift the
restriction) in case this is "fixed" in the kernel.
- Any vcpus that have cachetune enabled for them must also be pinned
- Users need to specify allocations for all cache ids that the vcpu
 might run on (according to the pinning acquired from before), for all
 others we'd just simply set it to all zeros or the same bitmask as the
 system's default group.
But for now we could just copy the system's setting to unspecified
caches or request the user to specify everything.
Post by Eli
*2. cache way fragment (no good answers)
I see that for now we allocate cache ways start from the low bits, newly
created VM will allocate cache from the next way, if some of the VM
(allocated ways in the middle, eg it's schemata is 00100) destroyed, and
that slot (1 cache way) may not fit others and it will be wasted, But,
how can we handle this, seems no good way, rearrangement? That will lead
cache missing in a time window I think.
Avoiding fragmentation is not a simple thing.  It's impossible to do
without any moving, which might be unwanted.  This will be solved by
providing an API that will tell you move the allocation if you so
desire.  For now I at least try allocating the smallest region into
which the requested allocation fits, so that the unallocated parts are
as big as possible.
Agree
Post by Eli
3. The admin/user should manually operate the default resource group,
that's is to say, after resctrl is mounted, the admin/user should
manually change the schemata of default group. Will libvirt provide
interface/API to handle it?
Yes, this is planned.
Post by Eli
4. Will provide some APIs like `FreeCacheWay` to end user to see how
many cache ways could be allocated on the host?
Yes, this should be provided by an API as well.
Post by Eli
    For other users/orchestrator (nova), they may need to know if a VM
can schedule on the host, but the cache ways is not liner, it may have
fragment.
5, What if other application want to have some shared cache ways with
some of the VM?
    Libvirt for now try to read all of the resource group (instead of
maintain the consumed cache ways itself), so if another resource group
was created under /sys/fs/resctl, and the schemata of it is "FFFFF",
then libvirt will report not enough room for new VM. But the user
actually want to have another Appliation(e.g. ovs, dpdk pmds) share
cache ways with the VM created by libvirt.
Adding support for shared allocations is planned as I said before,
however this is something that will be needed to be taken care of
differently anyway.  I don't know how specific the use case would be,
but let's say you want to have 8 cache ways allocated for the VM, but
share only 4 of them with some DPDK PMD.  You can't use "shared" because
that would just take some 8 bits even when some of them might be shared
with the system's default group.  Moreover it means that the allocation
can be shared with machines ran in the future.  So in this case you need
to have the 8 bits exclusively allocated and then (only after the
machine is started) pin the PMD process to those 4 cache ways.
Post by Eli
If the host enabled CDP, which is to see the host will report l3 cache type
code and data. when user don't want code/data cache ways allocated
separated, for current implement, it will report not support `both` type
l3 cache.
But we can improve this as make code and data schemata the same
e.g, if host enabled CDP, but user request 2 `both` type l3 cache.
L3DATA:0=3
L3CODE:0=3
Yes, that's what we want to achieve, but again, in a future patchset.
Hope that answers your questions.  Thanks for trying it out, it is
really complicated to develop something like this without the actual
hardware to test it on.
Yep.
Have a nice day,
Martin
John Ferlan
2018-01-02 19:35:00 UTC
Permalink
Post by Martin Kletzander
Just to ease the review of following patches.
---
src/conf/capabilities.c | 2 +-
src/conf/capabilities.h | 2 +-
src/util/virresctrl.c | 4 ++--
src/util/virresctrl.h | 8 ++++----
4 files changed, 8 insertions(+), 8 deletions(-)
Reviewed-by: John Ferlan <***@redhat.com>

John
Loading...