[PATCH 05/17] xen: Port Xen hypervizor related code from mini-os

Wed Jul 1 19:46:22 CEST 2020

Title: s/hypervizor/hypervisor/

On 01/07/2020 17:29, Anastasiia Lukianenko wrote:
> From: Oleksandr Andrushchenko <oleksandr_andrushchenko at epam.com>
> 
> Port hypervizor related code from mini-os. Update essential

Ditto.

But I would be quite cautious to import code from mini-OS in order to 
support Arm. The port has always been broken and from a look below needs 
to be refined for Arm.

> arch code to support required bit operations, memory barriers etc.
> 
> Copyright for the bits ported belong to at least the following authors,
> please see related files for details:
> 
> Copyright (c) 2002-2003, K A Fraser
> Copyright (c) 2005, Grzegorz Milos, gm281 at cam.ac.uk,Intel Research Cambridge
> Copyright (c) 2014, Karim Allah Ahmed <karim.allah.ahmed at gmail.com>
> 
> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko at epam.com>
> Signed-off-by: Anastasiia Lukianenko <anastasiia_lukianenko at epam.com>
> ---
>   arch/arm/include/asm/xen/system.h |  96 +++++++++++
>   common/board_r.c                  |  11 ++
>   drivers/Makefile                  |   1 +
>   drivers/xen/Makefile              |   5 +
>   drivers/xen/hypervisor.c          | 277 ++++++++++++++++++++++++++++++
>   include/xen.h                     |  11 ++
>   include/xen/hvm.h                 |  30 ++++
>   7 files changed, 431 insertions(+)
>   create mode 100644 arch/arm/include/asm/xen/system.h
>   create mode 100644 drivers/xen/Makefile
>   create mode 100644 drivers/xen/hypervisor.c
>   create mode 100644 include/xen.h
>   create mode 100644 include/xen/hvm.h
> 
> diff --git a/arch/arm/include/asm/xen/system.h b/arch/arm/include/asm/xen/system.h
> new file mode 100644
> index 0000000000..81ab90160e
> --- /dev/null
> +++ b/arch/arm/include/asm/xen/system.h
> @@ -0,0 +1,96 @@
> +/*
> + * SPDX-License-Identifier: GPL-2.0
> + *
> + * (C) 2014 Karim Allah Ahmed <karim.allah.ahmed at gmail.com>
> + * (C) 2020, EPAM Systems Inc.
> + */
> +#ifndef _ASM_ARM_XEN_SYSTEM_H
> +#define _ASM_ARM_XEN_SYSTEM_H
> +
> +#include <compiler.h>
> +#include <asm/bitops.h>
> +
> +/* If *ptr == old, then store new there (and return new).
> + * Otherwise, return the old value.
> + * Atomic.
> + */
> +#define synch_cmpxchg(ptr, old, new) \
> +({ __typeof__(*ptr) stored = old; \
> +	__atomic_compare_exchange_n(ptr, &stored, new, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ? new : old; \
> +})
> +
> +/* As test_and_clear_bit, but using __ATOMIC_SEQ_CST */
> +static inline int synch_test_and_clear_bit(int nr, volatile void *addr)
> +{
> +	u8 *byte = ((u8 *)addr) + (nr >> 3);
> +	u8 bit = 1 << (nr & 7);
> +	u8 orig;
> +
> +	orig = __atomic_fetch_and(byte, ~bit, __ATOMIC_SEQ_CST);
> +
> +	return (orig & bit) != 0;
> +}
> +
> +/* As test_and_set_bit, but using __ATOMIC_SEQ_CST */
> +static inline int synch_test_and_set_bit(int nr, volatile void *base)
> +{
> +	u8 *byte = ((u8 *)base) + (nr >> 3);
> +	u8 bit = 1 << (nr & 7);
> +	u8 orig;
> +
> +	orig = __atomic_fetch_or(byte, bit, __ATOMIC_SEQ_CST);
> +
> +	return (orig & bit) != 0;
> +}
> +
> +/* As set_bit, but using __ATOMIC_SEQ_CST */
> +static inline void synch_set_bit(int nr, volatile void *addr)
> +{
> +	synch_test_and_set_bit(nr, addr);
> +}
> +
> +/* As clear_bit, but using __ATOMIC_SEQ_CST */
> +static inline void synch_clear_bit(int nr, volatile void *addr)
> +{
> +	synch_test_and_clear_bit(nr, addr);
> +}
> +
> +/* As test_bit, but with a following memory barrier. */
> +//static inline int synch_test_bit(int nr, volatile void *addr)
> +static inline int synch_test_bit(int nr, const void *addr)
> +{
> +	int result;
> +
> +	result = test_bit(nr, addr);
> +	barrier();
> +	return result;
> +}

I can understand why we implement sync_* helpers as AFAICT the generic 
helpers are not SMP safe. However...

> +
> +#define xchg(ptr, v)	__atomic_exchange_n(ptr, v, __ATOMIC_SEQ_CST)
> +#define xchg(ptr, v)	__atomic_exchange_n(ptr, v, __ATOMIC_SEQ_CST)
> +
> +#define mb()		dsb()
> +#define rmb()		dsb()
> +#define wmb()		dsb()
> +#define __iormb()	dmb()
> +#define __iowmb()	dmb()

Why do you need to re-implement the barriers?

> +#define xen_mb()	mb()
> +#define xen_rmb()	rmb()
> +#define xen_wmb()	wmb()
> +
> +#define smp_processor_id()	0
Shouldn't this be common?

> +
> +#define to_phys(x)		((unsigned long)(x))
> +#define to_virt(x)		((void *)(x))
> +
> +#define PFN_UP(x)		(unsigned long)(((x) + PAGE_SIZE - 1) >> PAGE_SHIFT)
> +#define PFN_DOWN(x)		(unsigned long)((x) >> PAGE_SHIFT)
> +#define PFN_PHYS(x)		((unsigned long)(x) << PAGE_SHIFT)
> +#define PHYS_PFN(x)		(unsigned long)((x) >> PAGE_SHIFT)
> +
> +#define virt_to_pfn(_virt)	(PFN_DOWN(to_phys(_virt)))
> +#define virt_to_mfn(_virt)	(PFN_DOWN(to_phys(_virt)))
> +#define mfn_to_virt(_mfn)	(to_virt(PFN_PHYS(_mfn)))
> +#define pfn_to_virt(_pfn)	(to_virt(PFN_PHYS(_pfn)))

There is already generic phys <-> virt helpers (see 
include/asm-generic/io.h). So why do you need to create a new version?

> +
> +#endif
> diff --git a/common/board_r.c b/common/board_r.c
> index fa57fa9b69..fd36edb4e5 100644
> --- a/common/board_r.c
> +++ b/common/board_r.c
> @@ -56,6 +56,7 @@
>   #include <timer.h>
>   #include <trace.h>
>   #include <watchdog.h>
> +#include <xen.h>

Do we want to include it for other boards?

>   #ifdef CONFIG_ADDR_MAP
>   #include <asm/mmu.h>
>   #endif
> @@ -462,6 +463,13 @@ static int initr_mmc(void)
>   }
>   #endif
>   
> +#ifdef CONFIG_XEN
> +static int initr_xen(void)
> +{
> +	xen_init();
> +	return 0;
> +}
> +#endif
>   /*
>    * Tell if it's OK to load the environment early in boot.
>    *
> @@ -769,6 +777,9 @@ static init_fnc_t init_sequence_r[] = {
>   #endif
>   #ifdef CONFIG_MMC
>   	initr_mmc,
> +#endif
> +#ifdef CONFIG_XEN
> +	initr_xen,
>   #endif
>   	initr_env,
>   #ifdef CONFIG_SYS_BOOTPARAMS_LEN
> diff --git a/drivers/Makefile b/drivers/Makefile
> index 94e8c5da17..0dd8891e76 100644
> --- a/drivers/Makefile
> +++ b/drivers/Makefile
> @@ -28,6 +28,7 @@ obj-$(CONFIG_$(SPL_)REMOTEPROC) += remoteproc/
>   obj-$(CONFIG_$(SPL_TPL_)TPM) += tpm/
>   obj-$(CONFIG_$(SPL_TPL_)ACPI_PMC) += power/acpi_pmc/
>   obj-$(CONFIG_$(SPL_)BOARD) += board/
> +obj-$(CONFIG_XEN) += xen/
>   
>   ifndef CONFIG_TPL_BUILD
>   ifdef CONFIG_SPL_BUILD
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> new file mode 100644
> index 0000000000..1211bf2386
> --- /dev/null
> +++ b/drivers/xen/Makefile
> @@ -0,0 +1,5 @@
> +# SPDX-License-Identifier:	GPL-2.0+
> +#
> +# (C) Copyright 2020 EPAM Systems Inc.
> +
> +obj-y += hypervisor.o
> diff --git a/drivers/xen/hypervisor.c b/drivers/xen/hypervisor.c
> new file mode 100644
> index 0000000000..5883285142
> --- /dev/null
> +++ b/drivers/xen/hypervisor.c
> @@ -0,0 +1,277 @@
> +/******************************************************************************
> + * hypervisor.c
> + *
> + * Communication to/from hypervisor.
> + *
> + * Copyright (c) 2002-2003, K A Fraser
> + * Copyright (c) 2005, Grzegorz Milos, gm281 at cam.ac.uk,Intel Research Cambridge
> + * Copyright (c) 2020, EPAM Systems Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + */
> +#include <common.h>
> +#include <cpu_func.h>
> +#include <log.h>
> +#include <memalign.h>
> +
> +#include <asm/io.h>
> +#include <asm/armv8/mmu.h>
> +#include <asm/xen/system.h>
> +
> +#include <linux/bug.h>
> +
> +#include <xen/hvm.h>
> +#include <xen/interface/memory.h>
> +
> +#define active_evtchns(cpu, sh, idx)	\
> +	((sh)->evtchn_pending[idx] &	\
> +	 ~(sh)->evtchn_mask[idx])
> +
> +int in_callback;
> +
> +/*
> + * Shared page for communicating with the hypervisor.
> + * Events flags go here, for example.
> + */
> +struct shared_info *HYPERVISOR_shared_info;
> +
> +#ifndef CONFIG_PARAVIRT

Is there any plan to support this on x86?

> +static const char *param_name(int op)
> +{
> +#define PARAM(x)[HVM_PARAM_##x] = #x
> +	static const char *const names[] = {
> +		PARAM(CALLBACK_IRQ),
> +		PARAM(STORE_PFN),
> +		PARAM(STORE_EVTCHN),
> +		PARAM(PAE_ENABLED),
> +		PARAM(IOREQ_PFN),
> +		PARAM(TIMER_MODE),
> +		PARAM(HPET_ENABLED),
> +		PARAM(IDENT_PT),
> +		PARAM(ACPI_S_STATE),
> +		PARAM(VM86_TSS),
> +		PARAM(VPT_ALIGN),
> +		PARAM(CONSOLE_PFN),
> +		PARAM(CONSOLE_EVTCHN),

Most of those parameters are never going to be used on Arm. So could 
this be clobberred?

> +	};
> +#undef PARAM
> +
> +	if (op >= ARRAY_SIZE(names))
> +		return "unknown";
> +
> +	if (!names[op])
> +		return "reserved";
> +
> +	return names[op];
> +}
> +
> +int hvm_get_parameter_maintain_dcache(int idx, uint64_t *value)

I would recommend to add some comments explaining when this function is 
meant to be used and what it is doing in regards of the cache.

> +{
> +	struct xen_hvm_param xhv;
> +	int ret;

I don't think there is a guarantee that your cache is going to be clean 
when writing xhv. So you likely want to add a invalidate_dcache_range() 
before writing it.

> +
> +	xhv.domid = DOMID_SELF;
> +	xhv.index = idx;
> +	invalidate_dcache_range((unsigned long)&xhv,
> +				(unsigned long)&xhv + sizeof(xhv));
> +
> +	ret = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
> +	if (ret < 0) {
> +		pr_err("Cannot get hvm parameter %s (%d): %d!\n",
> +			   param_name(idx), idx, ret);
> +		BUG();
> +	}
> +	invalidate_dcache_range((unsigned long)&xhv,
> +				(unsigned long)&xhv + sizeof(xhv));
> +
> +	*value = xhv.value;
> +	return ret;
> +}
> +
> +int hvm_get_parameter(int idx, uint64_t *value)
> +{
> +	struct xen_hvm_param xhv;
> +	int ret;
> +
> +	xhv.domid = DOMID_SELF;
> +	xhv.index = idx;
> +	ret = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
> +	if (ret < 0) {
> +		pr_err("Cannot get hvm parameter %s (%d): %d!\n",
> +			   param_name(idx), idx, ret);
> +		BUG();
> +	}
> +
> +	*value = xhv.value;
> +	return ret;
> +}
> +
> +int hvm_set_parameter(int idx, uint64_t value)
> +{
> +	struct xen_hvm_param xhv;
> +	int ret;
> +
> +	xhv.domid = DOMID_SELF;
> +	xhv.index = idx;
> +	xhv.value = value;
> +	ret = HYPERVISOR_hvm_op(HVMOP_set_param, &xhv);
> +
> +	if (ret < 0) {
> +		pr_err("Cannot get hvm parameter %s (%d): %d!\n",
> +			   param_name(idx), idx, ret);
> +		BUG();
> +	}
> +
> +	return ret;
> +}
> +
> +struct shared_info *map_shared_info(void *p)
> +{
> +	struct xen_add_to_physmap xatp;
> +
> +	HYPERVISOR_shared_info = (struct shared_info *)memalign(PAGE_SIZE,
> +								PAGE_SIZE);
> +	if (HYPERVISOR_shared_info == NULL)
> +		BUG();
> +
> +	xatp.domid = DOMID_SELF;
> +	xatp.idx = 0;
> +	xatp.space = XENMAPSPACE_shared_info;
> +	xatp.gpfn = virt_to_pfn(HYPERVISOR_shared_info);
> +	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp) != 0)
> +		BUG();
> +
> +	return HYPERVISOR_shared_info;
> +}
> +
> +void unmap_shared_info(void)
> +{
> +	struct xen_remove_from_physmap xrtp;
> +
> +	xrtp.domid = DOMID_SELF;
> +	xrtp.gpfn = virt_to_pfn(HYPERVISOR_shared_info);
> +	if (HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrtp) != 0)
> +		BUG();
> +}
> +#endif
> +
> +void do_hypervisor_callback(struct pt_regs *regs)
> +{
> +	unsigned long l1, l2, l1i, l2i;
> +	unsigned int port;
> +	int cpu = 0;
> +	struct shared_info *s = HYPERVISOR_shared_info;
> +	struct vcpu_info *vcpu_info = &s->vcpu_info[cpu];
> +
> +	in_callback = 1;
> +
> +	vcpu_info->evtchn_upcall_pending = 0;
> +	/* NB x86. No need for a barrier here -- XCHG is a barrier on x86. */
> +#if !defined(__i386__) && !defined(__x86_64__)
> +	/* Clear master flag /before/ clearing selector flag. */
> +	wmb();
> +#endif
> +	l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
> +
> +	while (l1 != 0) {
> +		l1i = __ffs(l1);
> +		l1 &= ~(1UL << l1i);
> +
> +		while ((l2 = active_evtchns(cpu, s, l1i)) != 0) {
> +			l2i = __ffs(l2);
> +			l2 &= ~(1UL << l2i);
> +
> +			port = (l1i * (sizeof(unsigned long) * 8)) + l2i;
> +			/* TODO: handle new event: do_event(port, regs); */
> +			/* Suppress -Wunused-but-set-variable */
> +			(void)(port);
> +		}
> +	}

You likely want a memory barrier here as otherwise in_callback could be 
written/seen before the loop end.

> +
> +	in_callback = 0;
> +}
> +
> +void force_evtchn_callback(void)
> +{
> +#ifdef XEN_HAVE_PV_UPCALL_MASK
> +	int save;
> +#endif
> +	struct vcpu_info *vcpu;
> +
> +	vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];

On Arm, this is only valid for vCPU0. For all the other vCPUs, you will 
want to register a vCPU shared info.

> +#ifdef XEN_HAVE_PV_UPCALL_MASK
> +	save = vcpu->evtchn_upcall_mask;
> +#endif
> +
> +	while (vcpu->evtchn_upcall_pending) {
> +#ifdef XEN_HAVE_PV_UPCALL_MASK
> +		vcpu->evtchn_upcall_mask = 1;
> +#endif
> +		barrier();

What are you trying to prevent with this barrier? In particular why 
would the compiler be an issue but not the processor?

> +		do_hypervisor_callback(NULL);
> +		barrier();
> +#ifdef XEN_HAVE_PV_UPCALL_MASK
> +		vcpu->evtchn_upcall_mask = save;
> +		barrier();

Same here.

> +#endif
> +	};
> +}
> +
> +void mask_evtchn(uint32_t port)
> +{
> +	struct shared_info *s = HYPERVISOR_shared_info;
> +	synch_set_bit(port, &s->evtchn_mask[0]);
> +}
> +
> +void unmask_evtchn(uint32_t port)
> +{
> +	struct shared_info *s = HYPERVISOR_shared_info;
> +	struct vcpu_info *vcpu_info = &s->vcpu_info[smp_processor_id()];
> +
> +	synch_clear_bit(port, &s->evtchn_mask[0]);
> +
> +	/*
> +	 * The following is basically the equivalent of 'hw_resend_irq'. Just like
> +	 * a real IO-APIC we 'lose the interrupt edge' if the channel is masked.
> +	 */
This seems to be out-of-context now, you might want to update it.

> +	if (synch_test_bit(port, &s->evtchn_pending[0]) &&
> +	    !synch_test_and_set_bit(port / (sizeof(unsigned long) * 8),
> +				    &vcpu_info->evtchn_pending_sel)) {
> +		vcpu_info->evtchn_upcall_pending = 1;
> +#ifdef XEN_HAVE_PV_UPCALL_MASK
> +		if (!vcpu_info->evtchn_upcall_mask)
> +#endif
> +			force_evtchn_callback();
> +	}
> +}
> +
> +void clear_evtchn(uint32_t port)
> +{
> +	struct shared_info *s = HYPERVISOR_shared_info;
> +
> +	synch_clear_bit(port, &s->evtchn_pending[0]);
> +}
> +
> +void xen_init(void)
> +{
> +	debug("%s\n", __func__);

Is this a left-over?

> +
> +	map_shared_info(NULL);
> +}
> +
> diff --git a/include/xen.h b/include/xen.h
> new file mode 100644
> index 0000000000..1d6f74cc92
> --- /dev/null
> +++ b/include/xen.h
> @@ -0,0 +1,11 @@
> +/*
> + * SPDX-License-Identifier: GPL-2.0
> + *
> + * (C) 2020, EPAM Systems Inc.
> + */
> +#ifndef __XEN_H__
> +#define __XEN_H__
> +
> +void xen_init(void);
> +
> +#endif /* __XEN_H__ */
> diff --git a/include/xen/hvm.h b/include/xen/hvm.h
> new file mode 100644
> index 0000000000..89de9625ca
> --- /dev/null
> +++ b/include/xen/hvm.h
> @@ -0,0 +1,30 @@
> +/*
> + * SPDX-License-Identifier: GPL-2.0
> + *
> + * Simple wrappers around HVM functions
> + *
> + * Copyright (c) 2002-2003, K A Fraser
> + * Copyright (c) 2005, Grzegorz Milos, gm281 at cam.ac.uk,Intel Research Cambridge
> + * Copyright (c) 2020, EPAM Systems Inc.
> + */
> +#ifndef XEN_HVM_H__
> +#define XEN_HVM_H__
> +
> +#include <asm/xen/hypercall.h>
> +#include <xen/interface/hvm/params.h>
> +#include <xen/interface/xen.h>
> +
> +extern struct shared_info *HYPERVISOR_shared_info;
> +
> +int hvm_get_parameter(int idx, uint64_t *value);
> +int hvm_get_parameter_maintain_dcache(int idx, uint64_t *value);
> +int hvm_set_parameter(int idx, uint64_t value);
> +
> +struct shared_info *map_shared_info(void *p);
> +void unmap_shared_info(void);
> +void do_hypervisor_callback(struct pt_regs *regs);
> +void mask_evtchn(uint32_t port);
> +void unmask_evtchn(uint32_t port);
> +void clear_evtchn(uint32_t port);
> +
> +#endif /* XEN_HVM_H__ */

Cheers,

-- 
Julien Grall