[U-Boot] [PATCH] ARC: HSDK: add platform-specific commands

Thu Mar 22 18:43:44 UTC 2018

Hi Eugeniy,

To start from your patch doesn't apply on u-boot-arc/next:
---------------------------------->8-----------------------------
git am \[PATCH\]_ARC\:_HSDK\:_add_platform-specific_commands.mbox
Applying: ARC: HSDK: add platform-specific commands
/home/abrodkin/Projects/sources/git/u-boot/.git/worktrees/tmp-mainline-test/rebase-apply/patch:868: space before tab in indent.
	 			: /* no output */
error: patch failed: board/synopsys/hsdk/hsdk.c:1
error: board/synopsys/hsdk/hsdk.c: patch does not apply
Patch failed at 0001 ARC: HSDK: add platform-specific commands
The copy of the patch that failed is found in: /home/abrodkin/Projects/sources/git/u-boot/.git/worktrees/tmp-mainline-test/rebase-apply/patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".
# git am --abort 
# git s
On branch staging-arc
Your branch is up-to-date with 'u-boot-arc/next'.

Untracked files:
  (use "git add <file>..." to include in what will be committed)

	[PATCH]_ARC:_HSDK:_add_platform-specific_commands.mbox

nothing added to commit but untracked files present (use "git add" to track)
---------------------------------->8-----------------------------

I had to apply it manually, remove stuff related to hsdk.c and rm hsdk.c.
Care to check what's wrong there.
Also patch complained a lot on trailing CRs:
---------------------------------->8-----------------------------
(Stripping trailing CRs from patch; use --binary to disable.)
patching file arch/arc/dts/hsdk.dts
(Stripping trailing CRs from patch; use --binary to disable.)
patching file board/synopsys/hsdk/MAINTAINERS
(Stripping trailing CRs from patch; use --binary to disable.)
patching file board/synopsys/hsdk/Makefile
(Stripping trailing CRs from patch; use --binary to disable.)
patching file board/synopsys/hsdk/clk-lib.c
(Stripping trailing CRs from patch; use --binary to disable.)
patching file board/synopsys/hsdk/clk-lib.h
(Stripping trailing CRs from patch; use --binary to disable.)
patching file board/synopsys/hsdk/env-lib.c
(Stripping trailing CRs from patch; use --binary to disable.)
patching file board/synopsys/hsdk/env-lib.h
(Stripping trailing CRs from patch; use --binary to disable.)
patching file board/synopsys/hsdk/hsdk-cmd.c
(Stripping trailing CRs from patch; use --binary to disable.)
---------------------------------->8-----------------------------

Also there're obvious extra spaces, did you run checkpatch?
I guess no:
---------------------------------->8-----------------------------
total: 5 errors, 65 warnings, 11 checks, 1619 lines checked
---------------------------------->8-----------------------------

On Wed, 2018-03-21 at 21:46 +0300, Eugeniy Paltsev wrote:
> Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev at synopsys.com>
> ---
>  arch/arc/dts/hsdk.dts           |  56 +++
>  board/synopsys/hsdk/MAINTAINERS |   6 +-
>  board/synopsys/hsdk/Makefile    |   4 +-
>  board/synopsys/hsdk/clk-lib.c   |  68 +++
>  board/synopsys/hsdk/clk-lib.h   |  17 +
>  board/synopsys/hsdk/env-lib.c   | 295 ++++++++++++
>  board/synopsys/hsdk/env-lib.h   |  51 ++
>  board/synopsys/hsdk/hsdk-cmd.c  | 995 ++++++++++++++++++++++++++++++++++++++++
>  board/synopsys/hsdk/hsdk.c      |  95 ----
>  configs/hsdk_defconfig          |  15 +
>  include/configs/hsdk.h          |  38 +-
>  11 files changed, 1538 insertions(+), 102 deletions(-)
>  create mode 100644 board/synopsys/hsdk/clk-lib.c
>  create mode 100644 board/synopsys/hsdk/clk-lib.h
>  create mode 100644 board/synopsys/hsdk/env-lib.c
>  create mode 100644 board/synopsys/hsdk/env-lib.h
>  create mode 100644 board/synopsys/hsdk/hsdk-cmd.c
>  delete mode 100644 board/synopsys/hsdk/hsdk.c
> 
> diff --git a/arch/arc/dts/hsdk.dts b/arch/arc/dts/hsdk.dts
> index 67dfb93ca8..d9151ed646 100644
> --- a/arch/arc/dts/hsdk.dts
> +++ b/arch/arc/dts/hsdk.dts
> @@ -6,6 +6,7 @@
>  /dts-v1/;
>  
>  #include "skeleton.dtsi"
> +#include "dt-bindings/clock/snps,hsdk-cgu.h"
>  
>  / {
>  	#address-cells = <1>;
> @@ -13,6 +14,7 @@
>  
>  	aliases {
>  		console = &uart0;
> +		spi0 = "/spi at f0020000";

Maybe spi0 = &spi0?

...
	spi0: spi at f0020000 {
...

>  	};
>  
>  	cpu_card {
> @@ -24,6 +26,35 @@
>  		};
>  	};
>  
> +	clk-fmeas {
> +		clocks = <&cgu_clk CLK_ARC_PLL>, <&cgu_clk CLK_SYS_PLL>,
> +			 <&cgu_clk CLK_TUN_PLL>, <&cgu_clk CLK_DDR_PLL>,
> +			 <&cgu_clk CLK_ARC>, <&cgu_clk CLK_HDMI_PLL>,
> +			 <&cgu_clk CLK_TUN_TUN>, <&cgu_clk CLK_HDMI>,
> +			 <&cgu_clk CLK_SYS_APB>, <&cgu_clk CLK_SYS_AXI>,
> +			 <&cgu_clk CLK_SYS_ETH>, <&cgu_clk CLK_SYS_USB>,
> +			 <&cgu_clk CLK_SYS_SDIO>, <&cgu_clk CLK_SYS_HDMI>,
> +			 <&cgu_clk CLK_SYS_GFX_CORE>, <&cgu_clk CLK_SYS_GFX_DMA>,
> +			 <&cgu_clk CLK_SYS_GFX_CFG>, <&cgu_clk CLK_SYS_DMAC_CORE>,
> +			 <&cgu_clk CLK_SYS_DMAC_CFG>, <&cgu_clk CLK_SYS_SDIO_REF>,
> +			 <&cgu_clk CLK_SYS_SPI_REF>, <&cgu_clk CLK_SYS_I2C_REF>,
> +			 <&cgu_clk CLK_SYS_UART_REF>, <&cgu_clk CLK_SYS_EBI_REF>,
> +			 <&cgu_clk CLK_TUN_ROM>, <&cgu_clk CLK_TUN_PWM>;
> +		clock-names = "cpu-pll", "sys-pll",
> +			      "tun-pll", "ddr-clk",
> +			      "cpu-clk", "hdmi-pll",
> +			      "tun-clk", "hdmi-clk",
> +			      "apb-clk", "axi-clk",
> +			      "eth-clk", "usb-clk",
> +			      "sdio-clk", "hdmi-sys-clk",
> +			      "gfx-core-clk", "gfx-dma-clk",
> +			      "gfx-cfg-clk", "dmac-core-clk",
> +			      "dmac-cfg-clk", "sdio-ref-clk",
> +			      "spi-clk", "i2c-clk",
> +			      "uart-clk", "ebi-clk",
> +			      "rom-clk", "pwm-clk";
> +	};
> +
>  	cgu_clk: cgu-clk at f0000000 {
>  		compatible = "snps,hsdk-cgu-clock";
>  		reg = <0xf0000000 0x10>, <0xf00014B8 0x4>;
> @@ -53,4 +84,29 @@
>  		compatible = "generic-ohci";
>  		reg = <0xf0060000 0x100>;
>  	};
> +
> +	spi at f0020000 {
> +		compatible = "snps,dw-apb-ssi";
> +		reg = <0xf0020000 0x1000>;
> +		#address-cells = <1>;
> +		#size-cells = <0>;
> +		spi-max-frequency = <4000000>;
> +		clocks = <&cgu_clk CLK_SYS_SPI_REF>;
> +		clock-names = "spi_clk";
> +		cs-gpio = <&cs_gpio 0>;
> +		spi_flash at 0 {
> +			compatible = "spi-flash";
> +			reg = <0>;
> +			spi-max-frequency = <4000000>;
> +		};
> +	};
> +
> +	cs_gpio: gpio at f00114B0 {
> +		compatible = "snps,hsdk-creg-gpio";
> +		reg = <0xf00014B0 0x4>;
> +		gpio-controller;
> +		#gpio-cells = <1>;
> +		gpio-bank-name = "hsdk-spi-cs";
> +		gpio-count = <1>;
> +	};
>  };
> diff --git a/board/synopsys/hsdk/MAINTAINERS b/board/synopsys/hsdk/MAINTAINERS
> index d034bc479d..8c02f52976 100644
> --- a/board/synopsys/hsdk/MAINTAINERS
> +++ b/board/synopsys/hsdk/MAINTAINERS
> @@ -1,5 +1,5 @@
> -AXS10X BOARD
> -M:	Alexey Brodkin <abrodkin at synopsys.com>
> +HSDK BOARD
> +M:	Eugeniy Paltsev <paltsev at synopsys.com>
>  S:	Maintained
> -F:	board/synopsys/hsdk/
> +F:	board/synopsys/hsdk-cmd/

"hsdk-cmd" is a folder?
You need to keep "board/synopsys/hsdk/" as a folder with everything.

>  F:	configs/hsdk_defconfig
> diff --git a/board/synopsys/hsdk/Makefile b/board/synopsys/hsdk/Makefile
> index d84dd03265..390e429557 100644
> --- a/board/synopsys/hsdk/Makefile
> +++ b/board/synopsys/hsdk/Makefile
> @@ -4,4 +4,6 @@
>  # SPDX-License-Identifier:	GPL-2.0+
>  #
>  
> -obj-y	+= hsdk.o
> +obj-y	+= hsdk-cmd.o
> +obj-y	+= env-lib.o
> +obj-y	+= clk-lib.o
> diff --git a/board/synopsys/hsdk/clk-lib.c b/board/synopsys/hsdk/clk-lib.c
> new file mode 100644
> index 0000000000..a2a9cb17f3
> --- /dev/null
> +++ b/board/synopsys/hsdk/clk-lib.c

No copyright here?

> @@ -0,0 +1,68 @@
> +#include <clk.h>
> +#include <dm/device.h>
> +
> +#include "clk-lib.h"
> +
> +#define HZ_IN_MHZ	1000000
> +#define ceil(x, y)	({ ulong __x = (x), __y = (y); (__x + __y - 1) / __y; })
> +

Please add a comment here with description of the below function.
What does it do, why we don't have normal clock driver in "drivers/clk" folder,
and what kind of usage of that function we expect to have.

> +int soc_clk_ctl(const char *name, ulong *rate, enum clk_ctl_ops ctl)
> +{
> +	int ret;
> +	ulong mhz_rate, priv_rate;
> +	struct clk clk;
> +
> +	/* Dummy fmeas device, just to be able to use standard clk_* api funcs */
> +	struct udevice fmeas = {
> +		.name = "clk-fmeas",
> +		.node = ofnode_path("/clk-fmeas"),
> +	};
> +
> +	ret = clk_get_by_name(&fmeas, name, &clk);
> +	if (ret) {
> +		pr_err("clock '%s' not found, err=%d\n", name, ret);
> +		return ret;
> +	}
> +
> +	if (ctl & CLK_ON) {
> +		ret = clk_enable(&clk);
> +		if (ret && ret != -ENOSYS && ret != -ENOTSUPP)
> +			return ret;

What happens if ret == -ENOSYS or -ENOTSUPP?
It worth adding comments here because not everybody knows that
and this code looks strange as normally we bail on any error like:

	if (ret)
		return ret;

> +	}
> +
> +	if ((ctl & CLK_SET) && (rate != NULL)) {
> +		priv_rate = ctl & CLK_MHZ ? (*rate) * HZ_IN_MHZ : *rate;
> +		ret = clk_set_rate(&clk, priv_rate);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	if (ctl & CLK_OFF) {
> +		ret = clk_disable(&clk);
> +		if (ret) {

Why don't we check for -ENOSYS here as a plausible result?

> +			pr_err("clock '%s' can't disable, err=%d\n", name, ret);

"can't be disabled"

> +			return ret;
> +		}
> +	}
> +
> +	priv_rate = clk_get_rate(&clk);
> +
> +	clk_free(&clk);
> +
> +	mhz_rate = ceil(priv_rate, HZ_IN_MHZ);
> +
> +	if (ctl & CLK_MHZ)
> +		priv_rate = mhz_rate;
> +
> +	if ((ctl & CLK_GET) && (rate != NULL))
> +		*rate = priv_rate;
> +
> +	if ((ctl & CLK_PRINT) && (ctl & CLK_MHZ))
> +		printf("HSDK: clock '%s' rate %lu MHz\n", name, priv_rate);
> +	else if (ctl & CLK_PRINT)
> +		printf("HSDK: clock '%s' rate %lu Hz\n", name, priv_rate);
> +	else
> +		debug("HSDK: clock '%s' rate %lu MHz\n", name, mhz_rate);
> +
> +	return 0;
> +}
> diff --git a/board/synopsys/hsdk/clk-lib.h b/board/synopsys/hsdk/clk-lib.h
> new file mode 100644
> index 0000000000..6fc4c2a21e
> --- /dev/null
> +++ b/board/synopsys/hsdk/clk-lib.h
> @@ -0,0 +1,17 @@
> +#ifndef __BOARD_CLK_LIB_H
> +#define __BOARD_CLK_LIB_H
> +
> +#include <common.h>
> +
> +enum clk_ctl_ops {
> +	CLK_SET		= BIT(0), /* set frequency */
> +	CLK_GET		= BIT(1), /* get frequency */
> +	CLK_ON		= BIT(2), /* enable clock */
> +	CLK_OFF		= BIT(3), /* disable clock */
> +	CLK_PRINT	= BIT(4), /* print frequency */
> +	CLK_MHZ		= BIT(5)  /* all values in MHZ instead of HZ */
> +};
> +
> +int soc_clk_ctl(const char *name, ulong *rate, enum clk_ctl_ops ctl);
> +
> +#endif /* __BOARD_CLK_LIB_H */
> diff --git a/board/synopsys/hsdk/env-lib.c b/board/synopsys/hsdk/env-lib.c
> new file mode 100644
> index 0000000000..00aab0fccf
> --- /dev/null
> +++ b/board/synopsys/hsdk/env-lib.c
> @@ -0,0 +1,295 @@
> +#include "env-lib.h"
> +
> +#define MAX_CMD_LEN	25
> +

Please describe functions below as well.
There're a lot of very similarly names functions and it's not clear
what is a difference between them and what they are supposed to do.

This becomes even more important for ones in the bottom of the file
which do complex things executing inside many previously defined funcs.

> +static void env_clear_common(u32 index, const struct env_map_common *map)
> +{
> +	map[index].val->val = 0;
> +	map[index].val->set = false;
> +}
> +
> +static int env_read_common(u32 index, const struct env_map_common *map)
> +{
> +	u32 val;
> +
> +	if (!env_get_yesno(map[index].env_name)) {
> +		if (map[index].type == ENV_HEX) {
> +			val = (u32)env_get_hex(map[index].env_name, 0);
> +			debug("ENV: %s: = %#x\n", map[index].env_name, val);
> +		} else {
> +			val = (u32)env_get_ulong(map[index].env_name, 10, 0);
> +			debug("ENV: %s: = %d\n", map[index].env_name, val);
> +		}
> +
> +		map[index].val->val = val;
> +		map[index].val->set = true;
> +	}
> +
> +	return 0;
> +}

[snip]

> diff --git a/board/synopsys/hsdk/hsdk-cmd.c b/board/synopsys/hsdk/hsdk-cmd.c
> new file mode 100644
> index 0000000000..f329a53b97
> --- /dev/null
> +++ b/board/synopsys/hsdk/hsdk-cmd.c

I don't really like to merge hsdk.c into hsdk-cmd.c.
The latter is already huge so let's keep command in a separate file
while platform code keep in hsdk.c.

> @@ -0,0 +1,995 @@
> +#include <common.h>
> +#include <config.h>
> +#include <linux/printk.h>
> +#include <linux/kernel.h>
> +#include <linux/io.h>
> +#include <asm/arcregs.h>
> +#include <fdt_support.h>
> +#include <dwmmc.h>
> +#include <malloc.h>
> +#include <usb.h>
> +
> +#include "clk-lib.h"
> +#include "env-lib.h"
> +
> +DECLARE_GLOBAL_DATA_PTR;
> +
> +#define ALL_CPU_MASK		GENMASK(NR_CPUS - 1, 0)
> +#define MASTER_CPU_ID		0
> +#define APERTURE_SHIFT		28
> +#define NO_CCM			0x10
> +
> +#define RESET_VECTOR_ADDR	0x0
> +
> +#define	CREG_BASE		(ARC_PERIPHERAL_BASE + 0x1000)
> +#define	CREG_CPU_START		(CREG_BASE + 0x400)
> +#define	CREG_CPU_START_MASK	0xF

Why using tab above while in all other places it's a space?
Even if that was the case with initial code pls fix it when you move it.
Otherwise if we keep it where it was in hsdk.c let's not touch that for now
for the sake of clarity.

> +
> +#define SDIO_BASE		(ARC_PERIPHERAL_BASE + 0xA000)
> +#define SDIO_UHS_REG_EXT	(SDIO_BASE + 0x108)
> +#define SDIO_UHS_REG_EXT_DIV_2	(2 << 30)
> +
> +/* Uncached access macros */
> +#define arc_read_uncached_32(ptr)	\
> +({					\
> +	unsigned int __ret;		\
> +	__asm__ __volatile__(		\
> +	"	ld.di %0, [%1]	\n"	\
> +	: "=r"(__ret)			\
> +	: "r"(ptr));			\
> +	__ret;				\
> +})
> +
> +#define arc_write_uncached_32(ptr, data)\
> +({					\
> +	__asm__ __volatile__(		\
> +	"	st.di %0, [%1]	\n"	\
> +	:				\
> +	: "r"(data), "r"(ptr));		\
> +})
> +
> +struct hsdk_env_core_ctl {
> +	u32_env entry[NR_CPUS];
> +	u32_env iccm[NR_CPUS];
> +	u32_env dccm[NR_CPUS];
> +};
> +
> +struct hsdk_env_common_ctl {
> +	bool halt_on_boot;
> +	u32_env core_mask;
> +	u32_env cpu_freq;
> +	u32_env axi_freq;
> +	u32_env tun_freq;
> +	u32_env nvlim;
> +	u32_env icache;
> +	u32_env dcache;
> +};
> +
> +/* Uncached cross-cpu structure */
> +struct hsdk_cross_cpu {
> +	/* cross_cpu flags */
> +	u32 data_flag;
> +	u32 stack_ptr;
> +	s32 status[NR_CPUS];
> +
> +	/* cross_cpu data */
> +	u32 entry[NR_CPUS];
> +	u32 iccm[NR_CPUS];
> +	u32 dccm[NR_CPUS];
> +
> +	u32 core_mask;
> +	u32 icache;
> +	u32 dcache;
> +
> +	u8 cache_padding[ARCH_DMA_MINALIGN];
> +} __aligned(ARCH_DMA_MINALIGN);

Please document all members of those structures as it's not clear
what data goes there.

> +/* Place for slave cpu temporary stack */
> +static u32 slave_stack[256 * NR_CPUS] __aligned(4);

Why not usual DMA_MINALIGN?

> +static struct hsdk_env_common_ctl env_common = {};
> +static struct hsdk_env_core_ctl env_core = {};
> +static struct hsdk_cross_cpu cross_cpu_data;
> +
> +static const struct env_map_common env_map_common[] = {
> +	{ "core_mask",	ENV_HEX, true,	0x1, 0xF,	&env_common.core_mask },
> +	{ "non_volatile_limit", ENV_HEX, true, 0, 0xF,	&env_common.nvlim },
> +	{ "icache_ena",	ENV_HEX, true,	0, 1,		&env_common.icache },
> +	{ "dcache_ena",	ENV_HEX, true,	0, 1,		&env_common.dcache },
> +	{}
> +};
> +
> +static const struct env_map_common env_map_clock[] = {
> +	{ "cpu_freq",	ENV_DEC, false,	100, 1000,	&env_common.cpu_freq },
> +	{ "axi_freq",	ENV_DEC, false,	200, 800,	&env_common.axi_freq },
> +	{ "tun_freq",	ENV_DEC, false,	0, 150,		&env_common.tun_freq },
> +	{}
> +};
> +
> +static const struct env_map_percpu env_map_core[] = {
> +	{ "core_iccm", ENV_HEX, true, {NO_CCM, 0, NO_CCM, 0}, {NO_CCM, 0xF, NO_CCM, 0xF}, &env_core.iccm },
> +	{ "core_dccm", ENV_HEX, true, {NO_CCM, 0, NO_CCM, 0}, {NO_CCM, 0xF, NO_CCM, 0xF}, &env_core.dccm },
> +	{}
> +};
> +
> +static const struct env_map_common env_map_mask[] = {
> +	{ "core_mask",	ENV_HEX, false,	0x1, 0xF,	&env_common.core_mask },
> +	{}
> +};
> +
> +static const struct env_map_percpu env_map_go[] = {
> +	{ "core_entry", ENV_HEX, true, {0, 0, 0, 0}, {U32_MAX, U32_MAX, U32_MAX, U32_MAX}, &env_core.entry },
> +	{}
> +};

I'd add an explanation why in some cases we used uncached access and in some cases
we mess with caches. Otherwise it's unclear why we mix different ways to access data
which in general is very dangerous.

Moreover I'd introduce special accessors for dealing with data bypassing cache just to make
sure the same data never gets accessed in both cacahed and uncached way.
I understand that sound as an overkill but it might save us quite some time later if something
goes south.

> +static void sync_cross_cpu_data(void)
> +{
> +	u32 value;
> +
> +	for (u32 i = 0; i < NR_CPUS; i++) {
> +		value = env_core.entry[i].val;
> +		arc_write_uncached_32(&cross_cpu_data.entry[i], value);
> +	}
> +
> +	for (u32 i = 0; i < NR_CPUS; i++) {
> +		value = env_core.iccm[i].val;
> +		arc_write_uncached_32(&cross_cpu_data.iccm[i], value);
> +	}
> +
> +	for (u32 i = 0; i < NR_CPUS; i++) {
> +		value = env_core.dccm[i].val;
> +		arc_write_uncached_32(&cross_cpu_data.dccm[i], value);
> +	}
> +
> +	value = env_common.core_mask.val;
> +	arc_write_uncached_32(&cross_cpu_data.core_mask, value);
> +
> +	value = env_common.icache.val;
> +	arc_write_uncached_32(&cross_cpu_data.icache, value);
> +
> +	value = env_common.dcache.val;
> +	arc_write_uncached_32(&cross_cpu_data.dcache, value);
> +}
> +
> +static bool is_cpu_used(u32 cpu_id)
> +{
> +	return !!(env_common.core_mask.val & BIT(cpu_id));
> +}
> +
> +/* TODO: add ICCM and DCCM runtime check */
> +static void smp_init_slave_cpu_func(u32 core)

Why smp_xxx_func(), maybe just "init_slave_cpu()"?

> +{
> +	u32 val;
> +
> +	/* ICCM move if exists */
> +	val = arc_read_uncached_32(&cross_cpu_data.iccm[core]);
> +	if (val != NO_CCM)
> +		write_aux_reg(ARC_AUX_ICCM_BASE, val << APERTURE_SHIFT);
> +
> +	/* DCCM move if exists */
> +	val = arc_read_uncached_32(&cross_cpu_data.dccm[core]);
> +	if (val != NO_CCM)
> +		write_aux_reg(ARC_AUX_DCCM_BASE, val << APERTURE_SHIFT);
> +
> +	if (arc_read_uncached_32(&cross_cpu_data.icache))
> +		icache_enable();
> +	else
> +		icache_disable();
> +
> +	if (arc_read_uncached_32(&cross_cpu_data.dcache))
> +		dcache_enable();
> +	else
> +		dcache_disable();
> +}

[snip]

> +/* slave CPU entry for configuration */
> +__attribute__((naked, noreturn, flatten)) noinline void hsdk_core_init_f(void)
> +{
> +	__asm__ __volatile__(	"ld.di	r8,	[%0]\n"
> +				"mov	%%sp,	r8\n"
> +				"mov	%%fp,	%%sp\n"
> +	 			: /* no output */
> +				: "r" (&cross_cpu_data.stack_ptr));
> +
> +	invalidate_icache_all();
> +
> +	arc_write_uncached_32(&cross_cpu_data.status[CPU_ID_GET()], 1);
> +	smp_init_slave_cpu_func(CPU_ID_GET());
> +
> +	arc_write_uncached_32(&cross_cpu_data.data_flag, 0x12345678);

Magic numbers [0x12345678, 1-5]?

If 1-5 are stages of slave core init process maybe use more human-readable defines
instead? This will help significantly to the next person who's going to debug it
as code becomes much more readable.

> +	arc_write_uncached_32(&cross_cpu_data.status[CPU_ID_GET()], 2);
> +
> +	/* Halt the processor untill the master kick us again */
> +	halt_this_cpu();
> +
> +	__builtin_arc_nop();
> +	__builtin_arc_nop();
> +	__builtin_arc_nop();

NOPs are no longer required for ARCv2 cores.

> +
> +	arc_write_uncached_32(&cross_cpu_data.status[CPU_ID_GET()], 3);
> +
> +	/* get the updated entry - invalidate i$ */
> +	invalidate_icache_all();
> +
> +	arc_write_uncached_32(&cross_cpu_data.status[CPU_ID_GET()], 4);
> +
> +	/* Run our program */
> +	((void (*)(void))(arc_read_uncached_32(&cross_cpu_data.entry[CPU_ID_GET()])))();
> +
> +	arc_write_uncached_32(&cross_cpu_data.status[CPU_ID_GET()], 5);
> +
> +	/* Something went terribly wrong */
> +	while (true)
> +		halt_this_cpu();
> +}
> +
> +static void clear_cross_cpu_data(void)
> +{
> +	arc_write_uncached_32(&cross_cpu_data.data_flag, 0);
> +	arc_write_uncached_32(&cross_cpu_data.stack_ptr, 0);
> +
> +	for (u32 i = 0; i < NR_CPUS; i++)
> +		arc_write_uncached_32(&cross_cpu_data.status[i], 0);
> +}
> +
> +noinline static void do_init_slave_cpu(u32 cpu_id)
> +{
> +	u32 timeout = 100;

What units are we dealing here?

> +	/* TODO: optimize memory usage, now we have one unused aperture */

Makes no sense to me. If that's really important pls add more details here
so others may understand that and do it later.

> +	u32 stack_ptr = (u32)(slave_stack + (64 * cpu_id));
> +
> +	if (cpu_id >= NR_CPUS)
> +		return;
> +
> +	arc_write_uncached_32(&cross_cpu_data.data_flag, 0);
> +
> +	/* Use global unic place for slave cpu stack */

unic -> unique

> +	arc_write_uncached_32(&cross_cpu_data.stack_ptr, stack_ptr);
> +
> +	debug("CPU %u: stack pool base: %p\n", cpu_id, slave_stack);
> +	debug("CPU %u: current slave stack base: %x\n", cpu_id, stack_ptr);
> +	slave_cpu_set_boot_addr((u32)hsdk_core_init_f);
> +
> +	smp_kick_cpu_x(cpu_id);
> +
> +	debug("CPU %u: cross-cpu flag: %x [before timeout]\n", cpu_id,
> +	      arc_read_uncached_32(&cross_cpu_data.data_flag));
> +
> +	while (!arc_read_uncached_32(&cross_cpu_data.data_flag) && timeout--)
> +		mdelay(10);
> +
> +	/* Just to be sure that slave cpu is halted after it set data_flag */
> +	mdelay(20);
> +
> +	/*
> +	 * We need to panic here as there is no option to halt slave cpu
> +	 * (or check that slave cpu is halted)
> +	 */

Even though that might sound strange but I'd move that comment below
if (!timeout) and add {} around it. Otherwise reader first gets an impression that
we're going to panic unconditionally. And BTW where's panic()?

> +	if (!timeout)
> +		pr_err("CPU %u is not responding after init!\n", cpu_id);
> +
> +	/* Check current stage of slave cpu */
> +	if (arc_read_uncached_32(&cross_cpu_data.status[cpu_id]) != 2)
> +		pr_err("CPU %u status is unexpected: %d\n", cpu_id,
> +		       arc_read_uncached_32(&cross_cpu_data.status[cpu_id]));

See stage define would really help here especially if you name it not just
STAGE_1/2/x but more meaningful like STAGE_1_HW_INIT etc.

> +
> +	debug("CPU %u: cross-cpu flag: %x [after timeout]\n", cpu_id,
> +	      arc_read_uncached_32(&cross_cpu_data.data_flag));
> +	debug("CPU %u: status: %d [after timeout]\n", cpu_id,
> +	      arc_read_uncached_32(&cross_cpu_data.status[cpu_id]));
> +}
> +
> +static void do_init_slave_cpus(void)
> +{
> +	clear_cross_cpu_data();
> +	sync_cross_cpu_data();
> +
> +	debug("cross_cpu_data location: %#x\n", (u32)&cross_cpu_data);
> +
> +	for (u32 i = MASTER_CPU_ID + 1; i < NR_CPUS; i++)
> +		if (is_cpu_used(i))
> +			do_init_slave_cpu(i);
> +}
> +
> +static void do_init_master_cpu(void)
> +{
> +	/*
> +	 * Setup master caches even if master isn't used as we want to use
> +	 * same cache configuration on all running CPUs
> +	 */
> +	init_master_icache();
> +	init_master_dcache();
> +}
> +
> +enum hsdk_axi_masters {
> +	M_HS_CORE = 0,
> +	M_HS_RTT,
> +	M_AXI_TUN,
> +	M_HDMI_VIDEO,
> +	M_HDMI_AUDIO,
> +	M_USB_HOST,
> +	M_ETHERNET,
> +	M_SDIO,
> +	M_GPU,
> +	M_DMAC_0,
> +	M_DMAC_1,
> +	M_DVFS
> +};
> +
> +#define UPDATE_VAL	1
> +
> +/*
> + * m	master		AXI_M_m_SLV0	AXI_M_m_SLV1	AXI_M_m_OFFSET0	AXI_M_m_OFFSET1
> + * 0	HS (CBU)	0x11111111	0x63111111	0xFEDCBA98	0x0E543210
> + * 1	HS (RTT)	0x77777777	0x77777777	0xFEDCBA98	0x76543210
> + * 2	AXI Tunnel	0x88888888	0x88888888	0xFEDCBA98	0x76543210
> + * 3	HDMI-VIDEO	0x77777777	0x77777777	0xFEDCBA98	0x76543210
> + * 4	HDMI-ADUIO	0x77777777	0x77777777	0xFEDCBA98	0x76543210
> + * 5	USB-HOST	0x77777777	0x77999999	0xFEDCBA98	0x76DCBA98
> + * 6	ETHERNET	0x77777777	0x77999999	0xFEDCBA98	0x76DCBA98
> + * 7	SDIO		0x77777777	0x77999999	0xFEDCBA98	0x76DCBA98
> + * 8	GPU		0x77777777	0x77777777	0xFEDCBA98	0x76543210
> + * 9	DMAC (port #1)	0x77777777	0x77777777	0xFEDCBA98	0x76543210
> + * 10	DMAC (port #2)	0x77777777	0x77777777	0xFEDCBA98	0x76543210
> + * 11	DVFS 		0x00000000	0x60000000	0x00000000	0x00000000
> + */

Add a reference to docs where this data is taken from so others may self-educate
themselves, double-check values as well as reflect all changes later done in the
docs if any ever happen.

> +static void setup_clocks(void)
> +{
> +	ulong rate;
> +
> +	/* Setup CPU clock */
> +	if (env_common.cpu_freq.set) {
> +		rate = env_common.cpu_freq.val;
> +		soc_clk_ctl("cpu-clk", &rate, CLK_ON | CLK_SET | CLK_MHZ);
> +	}
> +
> +	/* Setup TUN clock */
> +	if (env_common.tun_freq.set) {
> +		rate = env_common.tun_freq.val;
> +		if (rate)
> +			soc_clk_ctl("tun-clk", &rate, CLK_ON | CLK_SET | CLK_MHZ);
> +		else
> +			soc_clk_ctl("tun-clk", NULL, CLK_OFF);
> +	}
> +
> +	if (env_common.axi_freq.set) {
> +		rate = env_common.axi_freq.val;
> +		soc_clk_ctl("axi-clk", &rate, CLK_SET | CLK_ON | CLK_MHZ);
> +	}
> +}
> +
> +static void do_init_claster(void)

claster -> cluster

> +{
> +	/*
> +	 * A multi-core ARC HS configuration always includes only one
> +	 * ARC_AUX_NON_VOLATILE_LIMIT register, which is shared by all the
> +	 * cores.
> +	 */
> +	init_claster_nvlim();

ditto

> +}
> +
> +static int check_master_cpu_id(void)
> +{
> +	if (CPU_ID_GET() == MASTER_CPU_ID)
> +		return 0;
> +
> +	pr_err("u-boot runs on non-master cpu with id: %lu\n", CPU_ID_GET());
> +
> +	return -ENOENT;
> +}
> +
> +noinline static int prepare_cpus(void)
> +{
> +	int ret;
> +
> +	ret = check_master_cpu_id();
> +	if (ret)
> +		return ret;
> +
> +	ret = envs_process_and_validate(env_map_common, env_map_core, is_cpu_used);
> +	if (ret)
> +		return ret;
> +
> +	printf("CPU start mask is %#x\n", env_common.core_mask.val);
> +
> +	__builtin_arc_nop();
> +	__builtin_arc_nop();

Why NOPs are here?

> +
> +	do_init_slave_cpus();
> +
> +	do_init_master_cpu();
> +
> +	do_init_claster();
> +
> +	return 0;
> +}
> +
> +static int hsdk_go_run(u32 cpu_start_reg)
> +{
> +	/* Cleanup caches, disable interrupts */
> +	cleanup_before_go();
> +
> +	if (env_common.halt_on_boot)
> +		halt_this_cpu();
> +
> +	__builtin_arc_nop();
> +	__builtin_arc_nop();
> +	__builtin_arc_nop();

Remove NOPs.

> +static int do_hsdk_clock_print_all(cmd_tbl_t *cmdtp, int flag, int argc,
> +				   char *const argv[])
> +{
> +	/* CPU clock domain */
> +	soc_clk_ctl("cpu-pll", NULL, CLK_PRINT | CLK_MHZ);
> +	soc_clk_ctl("cpu-clk", NULL, CLK_PRINT | CLK_MHZ);
> +	printf("\n");
> +
> +	/* SYS clock domain */
> +	soc_clk_ctl("sys-pll", NULL, CLK_PRINT | CLK_MHZ);
> +	soc_clk_ctl("apb-clk", NULL, CLK_PRINT | CLK_MHZ);
> +	soc_clk_ctl("axi-clk", NULL, CLK_PRINT | CLK_MHZ);
> +	soc_clk_ctl("eth-clk", NULL, CLK_PRINT | CLK_MHZ);
> +	soc_clk_ctl("usb-clk", NULL, CLK_PRINT | CLK_MHZ);
> +	soc_clk_ctl("sdio-clk", NULL, CLK_PRINT | CLK_MHZ);
> +/*	soc_clk_ctl("hdmi-sys-clk", NULL, CLK_PRINT | CLK_MHZ); */

Add comment on why these exist but commented out.
I.e. we do want to keep them and here's why...

> diff --git a/board/synopsys/hsdk/hsdk.c b/board/synopsys/hsdk/hsdk.c
> deleted file mode 100644
> index 5b3a063b69..0000000000
> --- a/board/synopsys/hsdk/hsdk.c
> +++ /dev/null
> @@ -1,95 +0,0 @@
> -/*
> - * Copyright (C) 2017 Synopsys, Inc. All rights reserved.
> - *
> - * SPDX-License-Identifier:	GPL-2.0+
> - */
> -
> -#include <common.h>
> -#include <dwmmc.h>
> -#include <malloc.h>
> -
> -DECLARE_GLOBAL_DATA_PTR;
> -
> -#define	CREG_BASE	(ARC_PERIPHERAL_BASE + 0x1000)
> -#define	CREG_PAE	(CREG_BASE + 0x180)
> -#define	CREG_PAE_UPDATE	(CREG_BASE + 0x194)
> -#define	CREG_CPU_START	(CREG_BASE + 0x400)
> -
> -int board_early_init_f(void)
> -{
> -	/* In current chip PAE support for DMA is broken, disabling it. */
> -	writel(0, (void __iomem *) CREG_PAE);
> -
> -	/* Really apply settings made above */
> -	writel(1, (void __iomem *) CREG_PAE_UPDATE);
> -
> -	return 0;
> -}
> -
> -#define SDIO_BASE              (ARC_PERIPHERAL_BASE + 0xA000)
> -#define SDIO_UHS_REG_EXT       (SDIO_BASE + 0x108)
> -#define SDIO_UHS_REG_EXT_DIV_2 (2 << 30)
> -
> -int board_mmc_init(bd_t *bis)
> -{
> -	struct dwmci_host *host = NULL;
> -
> -	host = malloc(sizeof(struct dwmci_host));
> -	if (!host) {
> -		printf("dwmci_host malloc fail!\n");
> -		return 1;
> -	}
> -
> -	/*
> -	 * Switch SDIO external ciu clock divider from default div-by-8 to
> -	 * minimum possible div-by-2.
> -	 */
> -	writel(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *) SDIO_UHS_REG_EXT);
> -
> -	memset(host, 0, sizeof(struct dwmci_host));
> -	host->name = "Synopsys Mobile storage";
> -	host->ioaddr = (void *)ARC_DWMMC_BASE;
> -	host->buswidth = 4;
> -	host->dev_index = 0;
> -	host->bus_hz = 50000000;
> -
> -	add_dwmci(host, host->bus_hz / 2, 400000);
> -
> -	return 0;
> -}
> -
> -void board_jump_and_run(ulong entry, int zero, int arch, uint params)
> -{
> -	void (*kernel_entry)(int zero, int arch, uint params);
> -
> -	kernel_entry = (void (*)(int, int, uint))entry;
> -
> -	smp_set_core_boot_addr(entry, -1);
> -	smp_kick_all_cpus();
> -	kernel_entry(zero, arch, params);
> -}
> -
> -#define RESET_VECTOR_ADDR	0x0
> -
> -void smp_set_core_boot_addr(unsigned long addr, int corenr)
> -{
> -	/* All cores have reset vector pointing to 0 */
> -	writel(addr, (void __iomem *)RESET_VECTOR_ADDR);
> -
> -	/* Make sure other cores see written value in memory */
> -	flush_dcache_all();
> -}
> -
> -void smp_kick_all_cpus(void)
> -{
> -#define BITS_START_CORE1	1
> -#define BITS_START_CORE2	2
> -#define BITS_START_CORE3	3
> -
> -	int cmd = readl((void __iomem *)CREG_CPU_START);
> -
> -	cmd |= (1 << BITS_START_CORE1) |
> -	       (1 << BITS_START_CORE2) |
> -	       (1 << BITS_START_CORE3);
> -	writel(cmd, (void __iomem *)CREG_CPU_START);
> -}

Wrote about that part in the very beginning.
Something wrong with that hsdk.c file in the patch and I'd prefer to keep it anyways.

> diff --git a/configs/hsdk_defconfig b/configs/hsdk_defconfig
> index 11cb7e03a6..a70bf9d8a1 100644
> --- a/configs/hsdk_defconfig
> +++ b/configs/hsdk_defconfig
> @@ -6,14 +6,21 @@ CONFIG_SYS_CLK_FREQ=1000000000
>  CONFIG_DEFAULT_DEVICE_TREE="hsdk"
>  CONFIG_USE_BOOTARGS=y
>  CONFIG_BOOTARGS="console=ttyS0,115200n8"
> +CONFIG_DISPLAY_BOARDINFO=y

Do we use it only for printing "model" value from .dtb?

> diff --git a/include/configs/hsdk.h b/include/configs/hsdk.h
> index fb4829ab46..a280ce0057 100644
> --- a/include/configs/hsdk.h
> +++ b/include/configs/hsdk.h

[snip]

> @@ -60,6 +61,32 @@
>   * Environment settings
>   */
>  #define CONFIG_ENV_SIZE			SZ_16K
> +/* Last 64K block is for env */
> +#define CONFIG_ENV_OFFSET		(SZ_64K * 16)
> +/* 64K block */
> +#define CONFIG_ENV_SECT_SIZE		SZ_64K

We store ENV on SD-card so how above changes are relevant to us?
I guess that's a reminder of our attempt to put ENV in SPI flash.
Pls get rid of that.

> +
> +#define CONFIG_EXTRA_ENV_SETTINGS \
> +	"core_dccm_0=0x10\0" \
> +	"core_dccm_1=0x6\0" \
> +	"core_dccm_2=0x10\0" \
> +	"core_dccm_3=0x6\0" \
> +	"core_iccm_0=0x10\0" \
> +	"core_iccm_1=0x6\0" \
> +	"core_iccm_2=0x10\0" \
> +	"core_iccm_3=0x6\0" \
> +	"core_mask=0xF\0" \
> +	"dcache_ena=0x1\0" \
> +	"icache_ena=0x1\0" \
> +	"non_volatile_limit=0xE\0" \
> +	"hsdk_hs34=setenv core_mask 0x2; setenv icache_ena 0x0; setenv dcache_ena 0x0; setenv core_iccm_1 0x7; setenv core_dccm_1 0x8; setenv
> non_volatile_limit 0x0;\0" \
> +	"hsdk_hs36=setenv core_mask 0x1; setenv icache_ena 0x1; setenv dcache_ena 0x1; setenv core_iccm_0 0x10; setenv core_dccm_0 0x10; setenv
> non_volatile_limit 0xE;\0" \
> +	"hsdk_hs36_ccm=setenv core_mask 0x2; setenv icache_ena 0x1; setenv dcache_ena 0x1; setenv core_iccm_1 0x7; setenv core_dccm_1 0x8; setenv
> non_volatile_limit 0xE;\0" \
> +	"hsdk_hs38=setenv core_mask 0x1; setenv icache_ena 0x1; setenv dcache_ena 0x1; setenv core_iccm_0 0x10; setenv core_dccm_0 0x10; setenv
> non_volatile_limit 0xE;\0" \
> +	"hsdk_hs38_ccm=setenv core_mask 0x2; setenv icache_ena 0x1; setenv dcache_ena 0x1; setenv core_iccm_1 0x7; setenv core_dccm_1 0x8; setenv
> non_volatile_limit 0xE;\0" \
> +	"hsdk_hs38x2=setenv core_mask 0x3; setenv icache_ena 0x1; setenv dcache_ena 0x1; setenv core_iccm_0 0x10; setenv core_dccm_0 0x10; setenv
> non_volatile_limit 0xE; setenv core_iccm_1 0x6; setenv core_dccm_1 0x6;\0" \
> +	"hsdk_hs38x3=setenv core_mask 0x7; setenv icache_ena 0x1; setenv dcache_ena 0x1; setenv core_iccm_0 0x10; setenv core_dccm_0 0x10; setenv
> non_volatile_limit 0xE; setenv core_iccm_1 0x6; setenv core_dccm_1 0x6; setenv core_iccm_2 0x10; setenv core_dccm_2 0x10;\0" \
> +	"hsdk_hs38x4=setenv core_mask 0xF; setenv icache_ena 0x1; setenv dcache_ena 0x1; setenv core_iccm_0 0x10; setenv core_dccm_0 0x10; setenv
> non_volatile_limit 0xE; setenv core_iccm_1 0x6; setenv core_dccm_1 0x6; setenv core_iccm_2 0x10; setenv core_dccm_2 0x10; setenv core_iccm_3 0x6;
> setenv core_dccm_3 0x6;\0"

It worth wrapping those long lines above so we fit in 80 symbols as usual.

-Alexey