u-boot/drivers/ddr/marvell/a38x/old/ddr3_training_hw_algo.c
Marek Behún cae6e8993c ddr: marvell: a38x: Import old DDR training code from 2017 version of U-Boot
Import DDR training code from commit 1b69ce2fc0 ("arm: mvebu:
ddr3_debug: remove self assignments") into
drivers/ddr/marvell/a38x/old/. The code is not used yet.

Explanation:

Since 2019, on some Turris Omnia boards we have been having problems
with newer versions of Marvell's DDR3 training code for Armada 38x,
which is ported from mv-ddr-marvell [1] to U-Boot into the
drivers/ddr/marvell/a38x/ directory:
- sometimes the DDR3 training fails on some older boards, sometime it
  fails on some newer boards
- other times it succeeds, but some boards experience crashes of the
  operating system after running for some time.

Using the stock version of Turris Omnia's U-Boot from solved these
issues, but this solution was not satisfactory, since we wanted
features from new U-Boot.

Back in 2020-2022 we have spent several months trying to debug the
issues, working with Marvell, on our own, and also with U-Boot
community, but these issues persist still.

One solution we used back in 2019 was a "hybrid U-Boot": the SPL part
(containing the DDR3 training code) was taken from the stock version,
while the proper part was current U-Boot at the time. This solution also
has its drawbacks, of which the main one is the need to glue binaries
from two separate builds.

Since then there have been some more changes to the DDR3 training code
in upstream mv-ddr-marvell that have been ported to U-Boot. We have
provided our users experimental builds of U-Boot in the TurrisOS so that
they could try upgrading the firmware and let us know if those problems
still exist. And they do.

We do not have the time nor manpower to debug this problem and fix it
properly. Marvell was also no able to provide a solution to this,
probably because they do not have the manpower as well.

I have therefore come up with this "not that pretty" solution: take the
DDR3 training code from an older version of U-Boot that is known to
work, put it into current U-Boot under old/ subdirectory within
drivers/ddr/marvell/a38x/, build into the SPL binary both the old and
new versions and make it possible to select the old version via an env
variable.

[1] https://github.com/MarvellEmbeddedProcessors/mv-ddr-marvell

Signed-off-by: Marek Behún <kabel@kernel.org>
2024-07-08 08:20:58 +02:00

685 lines
18 KiB
C

/*
* Copyright (C) Marvell International Ltd. and its affiliates
*
* SPDX-License-Identifier: GPL-2.0
*/
#include <spl.h>
#include <asm/io.h>
#include <asm/arch/cpu.h>
#include <asm/arch/soc.h>
#include "ddr3_init.h"
#define VREF_INITIAL_STEP 3
#define VREF_SECOND_STEP 1
#define VREF_MAX_INDEX 7
#define MAX_VALUE (1024 - 1)
#define MIN_VALUE (-MAX_VALUE)
#define GET_RD_SAMPLE_DELAY(data, cs) ((data >> rd_sample_mask[cs]) & 0xf)
u32 ck_delay = (u32)-1, ck_delay_16 = (u32)-1;
u32 ca_delay;
int ddr3_tip_centr_skip_min_win_check = 0;
u8 current_vref[MAX_BUS_NUM][MAX_INTERFACE_NUM];
u8 last_vref[MAX_BUS_NUM][MAX_INTERFACE_NUM];
u16 current_valid_window[MAX_BUS_NUM][MAX_INTERFACE_NUM];
u16 last_valid_window[MAX_BUS_NUM][MAX_INTERFACE_NUM];
u8 lim_vref[MAX_BUS_NUM][MAX_INTERFACE_NUM];
u8 interface_state[MAX_INTERFACE_NUM];
u8 vref_window_size[MAX_INTERFACE_NUM][MAX_BUS_NUM];
u8 vref_window_size_th = 12;
static u8 pup_st[MAX_BUS_NUM][MAX_INTERFACE_NUM];
static u32 rd_sample_mask[] = {
0,
8,
16,
24
};
#define VREF_STEP_1 0
#define VREF_STEP_2 1
#define VREF_CONVERGE 2
/*
* ODT additional timing
*/
int ddr3_tip_write_additional_odt_setting(u32 dev_num, u32 if_id)
{
u32 cs_num = 0, max_read_sample = 0, min_read_sample = 0;
u32 data_read[MAX_INTERFACE_NUM] = { 0 };
u32 read_sample[MAX_CS_NUM];
u32 val;
u32 pup_index;
int max_phase = MIN_VALUE, current_phase;
enum hws_access_type access_type = ACCESS_TYPE_UNICAST;
struct hws_topology_map *tm = ddr3_get_topology_map();
CHECK_STATUS(ddr3_tip_if_write(dev_num, access_type, if_id,
DUNIT_ODT_CONTROL_REG,
0 << 8, 0x3 << 8));
CHECK_STATUS(ddr3_tip_if_read(dev_num, access_type, if_id,
READ_DATA_SAMPLE_DELAY,
data_read, MASK_ALL_BITS));
val = data_read[if_id];
for (cs_num = 0; cs_num < MAX_CS_NUM; cs_num++) {
read_sample[cs_num] = GET_RD_SAMPLE_DELAY(val, cs_num);
/* find maximum of read_samples */
if (read_sample[cs_num] >= max_read_sample) {
if (read_sample[cs_num] == max_read_sample)
max_phase = MIN_VALUE;
else
max_read_sample = read_sample[cs_num];
for (pup_index = 0;
pup_index < tm->num_of_bus_per_interface;
pup_index++) {
CHECK_STATUS(ddr3_tip_bus_read
(dev_num, if_id,
ACCESS_TYPE_UNICAST, pup_index,
DDR_PHY_DATA,
RL_PHY_REG + CS_REG_VALUE(cs_num),
&val));
current_phase = ((int)val & 0xe0) >> 6;
if (current_phase >= max_phase)
max_phase = current_phase;
}
}
/* find minimum */
if (read_sample[cs_num] < min_read_sample)
min_read_sample = read_sample[cs_num];
}
min_read_sample = min_read_sample - 1;
max_read_sample = max_read_sample + 4 + (max_phase + 1) / 2 + 1;
if (min_read_sample >= 0xf)
min_read_sample = 0xf;
if (max_read_sample >= 0x1f)
max_read_sample = 0x1f;
CHECK_STATUS(ddr3_tip_if_write(dev_num, access_type, if_id,
ODT_TIMING_LOW,
((min_read_sample - 1) << 12),
0xf << 12));
CHECK_STATUS(ddr3_tip_if_write(dev_num, access_type, if_id,
ODT_TIMING_LOW,
(max_read_sample << 16),
0x1f << 16));
return MV_OK;
}
int get_valid_win_rx(u32 dev_num, u32 if_id, u8 res[4])
{
u32 reg_pup = RESULT_DB_PHY_REG_ADDR;
u32 reg_data;
u32 cs_num;
int i;
cs_num = 0;
/* TBD */
reg_pup += cs_num;
for (i = 0; i < 4; i++) {
CHECK_STATUS(ddr3_tip_bus_read(dev_num, if_id,
ACCESS_TYPE_UNICAST, i,
DDR_PHY_DATA, reg_pup,
&reg_data));
res[i] = (reg_data >> RESULT_DB_PHY_REG_RX_OFFSET) & 0x1f;
}
return 0;
}
/*
* This algorithm deals with the vertical optimum from Voltage point of view
* of the sample signal.
* Voltage sample point can improve the Eye / window size of the bit and the
* pup.
* The problem is that it is tune for all DQ the same so there isn't any
* PBS like code.
* It is more like centralization.
* But because we don't have The training SM support we do it a bit more
* smart search to save time.
*/
int ddr3_tip_vref(u32 dev_num)
{
/*
* The Vref register have non linear order. Need to check what will be
* in future projects.
*/
u32 vref_map[8] = {
1, 2, 3, 4, 5, 6, 7, 0
};
/* State and parameter definitions */
u32 initial_step = VREF_INITIAL_STEP;
/* need to be assign with minus ????? */
u32 second_step = VREF_SECOND_STEP;
u32 algo_run_flag = 0, currrent_vref = 0;
u32 while_count = 0;
u32 pup = 0, if_id = 0, num_pup = 0, rep = 0;
u32 val = 0;
u32 reg_addr = 0xa8;
u32 copy_start_pattern, copy_end_pattern;
enum hws_result *flow_result = ddr3_tip_get_result_ptr(training_stage);
u8 res[4];
struct hws_topology_map *tm = ddr3_get_topology_map();
CHECK_STATUS(ddr3_tip_special_rx(dev_num));
/* save start/end pattern */
copy_start_pattern = start_pattern;
copy_end_pattern = end_pattern;
/* set vref as centralization pattern */
start_pattern = PATTERN_VREF;
end_pattern = PATTERN_VREF;
/* init params */
for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
VALIDATE_ACTIVE(tm->if_act_mask, if_id);
for (pup = 0;
pup < tm->num_of_bus_per_interface; pup++) {
current_vref[pup][if_id] = 0;
last_vref[pup][if_id] = 0;
lim_vref[pup][if_id] = 0;
current_valid_window[pup][if_id] = 0;
last_valid_window[pup][if_id] = 0;
if (vref_window_size[if_id][pup] >
vref_window_size_th) {
pup_st[pup][if_id] = VREF_CONVERGE;
DEBUG_TRAINING_HW_ALG(
DEBUG_LEVEL_INFO,
("VREF config, IF[ %d ]pup[ %d ] - Vref tune not requered (%d)\n",
if_id, pup, __LINE__));
} else {
pup_st[pup][if_id] = VREF_STEP_1;
CHECK_STATUS(ddr3_tip_bus_read
(dev_num, if_id,
ACCESS_TYPE_UNICAST, pup,
DDR_PHY_DATA, reg_addr, &val));
CHECK_STATUS(ddr3_tip_bus_write
(dev_num, ACCESS_TYPE_UNICAST,
if_id, ACCESS_TYPE_UNICAST,
pup, DDR_PHY_DATA, reg_addr,
(val & (~0xf)) | vref_map[0]));
DEBUG_TRAINING_HW_ALG(
DEBUG_LEVEL_INFO,
("VREF config, IF[ %d ]pup[ %d ] - Vref = %X (%d)\n",
if_id, pup,
(val & (~0xf)) | vref_map[0],
__LINE__));
}
}
interface_state[if_id] = 0;
}
/* TODO: Set number of active interfaces */
num_pup = tm->num_of_bus_per_interface * MAX_INTERFACE_NUM;
while ((algo_run_flag <= num_pup) & (while_count < 10)) {
while_count++;
for (rep = 1; rep < 4; rep++) {
ddr3_tip_centr_skip_min_win_check = 1;
ddr3_tip_centralization_rx(dev_num);
ddr3_tip_centr_skip_min_win_check = 0;
/* Read Valid window results only for non converge pups */
for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
VALIDATE_ACTIVE(tm->if_act_mask, if_id);
if (interface_state[if_id] != 4) {
get_valid_win_rx(dev_num, if_id, res);
for (pup = 0;
pup < tm->num_of_bus_per_interface;
pup++) {
VALIDATE_ACTIVE
(tm->bus_act_mask, pup);
if (pup_st[pup]
[if_id] ==
VREF_CONVERGE)
continue;
current_valid_window[pup]
[if_id] =
(current_valid_window[pup]
[if_id] * (rep - 1) +
1000 * res[pup]) / rep;
}
}
}
}
for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
VALIDATE_ACTIVE(tm->if_act_mask, if_id);
DEBUG_TRAINING_HW_ALG(
DEBUG_LEVEL_TRACE,
("current_valid_window: IF[ %d ] - ", if_id));
for (pup = 0;
pup < tm->num_of_bus_per_interface; pup++) {
VALIDATE_ACTIVE(tm->bus_act_mask, pup);
DEBUG_TRAINING_HW_ALG(DEBUG_LEVEL_TRACE,
("%d ",
current_valid_window
[pup][if_id]));
}
DEBUG_TRAINING_HW_ALG(DEBUG_LEVEL_TRACE, ("\n"));
}
/* Compare results and respond as function of state */
for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
VALIDATE_ACTIVE(tm->if_act_mask, if_id);
for (pup = 0;
pup < tm->num_of_bus_per_interface; pup++) {
VALIDATE_ACTIVE(tm->bus_act_mask, pup);
DEBUG_TRAINING_HW_ALG(DEBUG_LEVEL_TRACE,
("I/F[ %d ], pup[ %d ] STATE #%d (%d)\n",
if_id, pup,
pup_st[pup]
[if_id], __LINE__));
if (pup_st[pup][if_id] == VREF_CONVERGE)
continue;
DEBUG_TRAINING_HW_ALG(DEBUG_LEVEL_TRACE,
("I/F[ %d ], pup[ %d ] CHECK progress - Current %d Last %d, limit VREF %d (%d)\n",
if_id, pup,
current_valid_window[pup]
[if_id],
last_valid_window[pup]
[if_id], lim_vref[pup]
[if_id], __LINE__));
/*
* The -1 is for solution resolution +/- 1 tap
* of ADLL
*/
if (current_valid_window[pup][if_id] + 200 >=
(last_valid_window[pup][if_id])) {
if (pup_st[pup][if_id] == VREF_STEP_1) {
/*
* We stay in the same state and
* step just update the window
* size (take the max) and Vref
*/
if (current_vref[pup]
[if_id] == VREF_MAX_INDEX) {
/*
* If we step to the end
* and didn't converge
* to some particular
* better Vref value
* define the pup as
* converge and step
* back to nominal
* Vref.
*/
pup_st[pup]
[if_id] =
VREF_CONVERGE;
algo_run_flag++;
interface_state
[if_id]++;
DEBUG_TRAINING_HW_ALG
(DEBUG_LEVEL_TRACE,
("I/F[ %d ], pup[ %d ] VREF_CONVERGE - Vref = %X (%d)\n",
if_id, pup,
current_vref[pup]
[if_id],
__LINE__));
} else {
/* continue to update the Vref index */
current_vref[pup]
[if_id] =
((current_vref[pup]
[if_id] +
initial_step) >
VREF_MAX_INDEX) ?
VREF_MAX_INDEX
: (current_vref[pup]
[if_id] +
initial_step);
if (current_vref[pup]
[if_id] ==
VREF_MAX_INDEX) {
pup_st[pup]
[if_id]
=
VREF_STEP_2;
}
lim_vref[pup]
[if_id] =
last_vref[pup]
[if_id] =
current_vref[pup]
[if_id];
}
last_valid_window[pup]
[if_id] =
GET_MAX(current_valid_window
[pup][if_id],
last_valid_window
[pup]
[if_id]);
/* update the Vref for next stage */
currrent_vref =
current_vref[pup]
[if_id];
CHECK_STATUS
(ddr3_tip_bus_read
(dev_num, if_id,
ACCESS_TYPE_UNICAST, pup,
DDR_PHY_DATA, reg_addr,
&val));
CHECK_STATUS
(ddr3_tip_bus_write
(dev_num,
ACCESS_TYPE_UNICAST,
if_id,
ACCESS_TYPE_UNICAST, pup,
DDR_PHY_DATA, reg_addr,
(val & (~0xf)) |
vref_map[currrent_vref]));
DEBUG_TRAINING_HW_ALG
(DEBUG_LEVEL_TRACE,
("VREF config, IF[ %d ]pup[ %d ] - Vref = %X (%d)\n",
if_id, pup,
(val & (~0xf)) |
vref_map[currrent_vref],
__LINE__));
} else if (pup_st[pup][if_id]
== VREF_STEP_2) {
/*
* We keep on search back with
* the same step size.
*/
last_valid_window[pup]
[if_id] =
GET_MAX(current_valid_window
[pup][if_id],
last_valid_window
[pup]
[if_id]);
last_vref[pup][if_id] =
current_vref[pup]
[if_id];
/* we finish all search space */
if ((current_vref[pup]
[if_id] - second_step) == lim_vref[pup][if_id]) {
/*
* If we step to the end
* and didn't converge
* to some particular
* better Vref value
* define the pup as
* converge and step
* back to nominal
* Vref.
*/
pup_st[pup]
[if_id] =
VREF_CONVERGE;
algo_run_flag++;
interface_state
[if_id]++;
current_vref[pup]
[if_id] =
(current_vref[pup]
[if_id] -
second_step);
DEBUG_TRAINING_HW_ALG
(DEBUG_LEVEL_TRACE,
("I/F[ %d ], pup[ %d ] VREF_CONVERGE - Vref = %X (%d)\n",
if_id, pup,
current_vref[pup]
[if_id],
__LINE__));
} else
/* we finish all search space */
if (current_vref[pup]
[if_id] ==
lim_vref[pup]
[if_id]) {
/*
* If we step to the end
* and didn't converge
* to some particular
* better Vref value
* define the pup as
* converge and step
* back to nominal
* Vref.
*/
pup_st[pup]
[if_id] =
VREF_CONVERGE;
algo_run_flag++;
interface_state
[if_id]++;
DEBUG_TRAINING_HW_ALG
(DEBUG_LEVEL_TRACE,
("I/F[ %d ], pup[ %d ] VREF_CONVERGE - Vref = %X (%d)\n",
if_id, pup,
current_vref[pup]
[if_id],
__LINE__));
} else {
current_vref[pup]
[if_id] =
current_vref[pup]
[if_id] -
second_step;
}
/* Update the Vref for next stage */
currrent_vref =
current_vref[pup]
[if_id];
CHECK_STATUS
(ddr3_tip_bus_read
(dev_num, if_id,
ACCESS_TYPE_UNICAST, pup,
DDR_PHY_DATA, reg_addr,
&val));
CHECK_STATUS
(ddr3_tip_bus_write
(dev_num,
ACCESS_TYPE_UNICAST,
if_id,
ACCESS_TYPE_UNICAST, pup,
DDR_PHY_DATA, reg_addr,
(val & (~0xf)) |
vref_map[currrent_vref]));
DEBUG_TRAINING_HW_ALG
(DEBUG_LEVEL_TRACE,
("VREF config, IF[ %d ]pup[ %d ] - Vref = %X (%d)\n",
if_id, pup,
(val & (~0xf)) |
vref_map[currrent_vref],
__LINE__));
}
} else {
/* we change state and change step */
if (pup_st[pup][if_id] == VREF_STEP_1) {
pup_st[pup][if_id] =
VREF_STEP_2;
lim_vref[pup][if_id] =
current_vref[pup]
[if_id] - initial_step;
last_valid_window[pup]
[if_id] =
current_valid_window[pup]
[if_id];
last_vref[pup][if_id] =
current_vref[pup]
[if_id];
current_vref[pup][if_id] =
last_vref[pup][if_id] -
second_step;
/* Update the Vref for next stage */
CHECK_STATUS
(ddr3_tip_bus_read
(dev_num, if_id,
ACCESS_TYPE_UNICAST, pup,
DDR_PHY_DATA, reg_addr,
&val));
CHECK_STATUS
(ddr3_tip_bus_write
(dev_num,
ACCESS_TYPE_UNICAST,
if_id,
ACCESS_TYPE_UNICAST, pup,
DDR_PHY_DATA, reg_addr,
(val & (~0xf)) |
vref_map[current_vref[pup]
[if_id]]));
DEBUG_TRAINING_HW_ALG
(DEBUG_LEVEL_TRACE,
("VREF config, IF[ %d ]pup[ %d ] - Vref = %X (%d)\n",
if_id, pup,
(val & (~0xf)) |
vref_map[current_vref[pup]
[if_id]],
__LINE__));
} else if (pup_st[pup][if_id] == VREF_STEP_2) {
/*
* The last search was the max
* point set value and exit
*/
CHECK_STATUS
(ddr3_tip_bus_read
(dev_num, if_id,
ACCESS_TYPE_UNICAST, pup,
DDR_PHY_DATA, reg_addr,
&val));
CHECK_STATUS
(ddr3_tip_bus_write
(dev_num,
ACCESS_TYPE_UNICAST,
if_id,
ACCESS_TYPE_UNICAST, pup,
DDR_PHY_DATA, reg_addr,
(val & (~0xf)) |
vref_map[last_vref[pup]
[if_id]]));
DEBUG_TRAINING_HW_ALG
(DEBUG_LEVEL_TRACE,
("VREF config, IF[ %d ]pup[ %d ] - Vref = %X (%d)\n",
if_id, pup,
(val & (~0xf)) |
vref_map[last_vref[pup]
[if_id]],
__LINE__));
pup_st[pup][if_id] =
VREF_CONVERGE;
algo_run_flag++;
interface_state[if_id]++;
DEBUG_TRAINING_HW_ALG
(DEBUG_LEVEL_TRACE,
("I/F[ %d ], pup[ %d ] VREF_CONVERGE - Vref = %X (%d)\n",
if_id, pup,
current_vref[pup]
[if_id], __LINE__));
}
}
}
}
}
for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
VALIDATE_ACTIVE(tm->if_act_mask, if_id);
for (pup = 0;
pup < tm->num_of_bus_per_interface; pup++) {
VALIDATE_ACTIVE(tm->bus_act_mask, pup);
CHECK_STATUS(ddr3_tip_bus_read
(dev_num, if_id,
ACCESS_TYPE_UNICAST, pup,
DDR_PHY_DATA, reg_addr, &val));
DEBUG_TRAINING_HW_ALG(
DEBUG_LEVEL_INFO,
("FINAL values: I/F[ %d ], pup[ %d ] - Vref = %X (%d)\n",
if_id, pup, val, __LINE__));
}
}
flow_result[if_id] = TEST_SUCCESS;
/* restore start/end pattern */
start_pattern = copy_start_pattern;
end_pattern = copy_end_pattern;
return 0;
}
/*
* CK/CA Delay
*/
int ddr3_tip_cmd_addr_init_delay(u32 dev_num, u32 adll_tap)
{
u32 if_id = 0;
u32 ck_num_adll_tap = 0, ca_num_adll_tap = 0, data = 0;
struct hws_topology_map *tm = ddr3_get_topology_map();
/*
* ck_delay_table is delaying the of the clock signal only.
* (to overcome timing issues between_c_k & command/address signals)
*/
/*
* ca_delay is delaying the of the entire command & Address signals
* (include Clock signal to overcome DGL error on the Clock versus
* the DQS).
*/
/* Calc ADLL Tap */
if ((ck_delay == -1) || (ck_delay_16 == -1)) {
DEBUG_TRAINING_HW_ALG(
DEBUG_LEVEL_ERROR,
("ERROR: One of ck_delay values not initialized!!!\n"));
}
for (if_id = 0; if_id <= MAX_INTERFACE_NUM - 1; if_id++) {
VALIDATE_ACTIVE(tm->if_act_mask, if_id);
/* Calc delay ps in ADLL tap */
if (tm->interface_params[if_id].bus_width ==
BUS_WIDTH_16)
ck_num_adll_tap = ck_delay_16 / adll_tap;
else
ck_num_adll_tap = ck_delay / adll_tap;
ca_num_adll_tap = ca_delay / adll_tap;
data = (ck_num_adll_tap & 0x3f) +
((ca_num_adll_tap & 0x3f) << 10);
/*
* Set the ADLL number to the CK ADLL for Interfaces for
* all Pup
*/
DEBUG_TRAINING_HW_ALG(
DEBUG_LEVEL_TRACE,
("ck_num_adll_tap %d ca_num_adll_tap %d adll_tap %d\n",
ck_num_adll_tap, ca_num_adll_tap, adll_tap));
CHECK_STATUS(ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST,
if_id, ACCESS_TYPE_MULTICAST,
PARAM_NOT_CARE, DDR_PHY_CONTROL,
0x0, data));
}
return MV_OK;
}