aboutsummaryrefslogtreecommitdiff
path: root/resources/libreboot/patch/kgpe-d16/0011-northbridge-amd-amdmct-Fix-broken-AMD-K10-DDR3-memor.patch
diff options
context:
space:
mode:
Diffstat (limited to 'resources/libreboot/patch/kgpe-d16/0011-northbridge-amd-amdmct-Fix-broken-AMD-K10-DDR3-memor.patch')
-rw-r--r--resources/libreboot/patch/kgpe-d16/0011-northbridge-amd-amdmct-Fix-broken-AMD-K10-DDR3-memor.patch3451
1 files changed, 3451 insertions, 0 deletions
diff --git a/resources/libreboot/patch/kgpe-d16/0011-northbridge-amd-amdmct-Fix-broken-AMD-K10-DDR3-memor.patch b/resources/libreboot/patch/kgpe-d16/0011-northbridge-amd-amdmct-Fix-broken-AMD-K10-DDR3-memor.patch
new file mode 100644
index 00000000..ec822df4
--- /dev/null
+++ b/resources/libreboot/patch/kgpe-d16/0011-northbridge-amd-amdmct-Fix-broken-AMD-K10-DDR3-memor.patch
@@ -0,0 +1,3451 @@
+From 791a6ea672f16f971422f10514bb0c4225930489 Mon Sep 17 00:00:00 2001
+From: Timothy Pearson <kb9vqf@pearsoncomputing.net>
+Date: Sat, 5 Sep 2015 17:55:58 -0500
+Subject: [PATCH 011/146] northbridge/amd/amdmct: Fix broken AMD K10 DDR3
+ memory initalization
+
+---
+ src/northbridge/amd/amdmct/mct/mct_d.c | 1 -
+ src/northbridge/amd/amdmct/mct_ddr3/mct_d.c | 177 ++++-
+ src/northbridge/amd/amdmct/mct_ddr3/mct_d.h | 8 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h | 87 +--
+ src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c | 6 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c | 806 ++++++++++++-----------
+ src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c | 6 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c | 14 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c | 3 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mctproc.c | 19 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c | 5 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c | 803 +++++++++++-----------
+ src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c | 18 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c | 13 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c | 7 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mctwl.c | 42 +-
+ src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c | 267 ++++----
+ src/northbridge/amd/amdmct/wrappers/mcti_d.c | 114 +---
+ 18 files changed, 1254 insertions(+), 1142 deletions(-)
+
+diff --git a/src/northbridge/amd/amdmct/mct/mct_d.c b/src/northbridge/amd/amdmct/mct/mct_d.c
+index 3dec934..88910e2 100644
+--- a/src/northbridge/amd/amdmct/mct/mct_d.c
++++ b/src/northbridge/amd/amdmct/mct/mct_d.c
+@@ -542,7 +542,6 @@ static void HTMemMapInit_D(struct MCTStatStruc *pMCTstat,
+ pDCTstat = pDCTstatA + Node;
+ devx = pDCTstat->dev_map;
+ DramSelBaseAddr = 0;
+- pDCTstat = pDCTstatA + Node;
+ if (!pDCTstat->GangedMode) {
+ DramSelBaseAddr = pDCTstat->NodeSysLimit - pDCTstat->DCTSysLimit;
+ /*In unganged mode, we must add DCT0 and DCT1 to DCTSysLimit */
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
+index 71a6be8..fa59d71 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
+@@ -214,6 +214,8 @@ static const u8 Table_DQSRcvEn_Offset[] = {0x00,0x01,0x10,0x11,0x2};
+ static const u8 Tab_L1CLKDis[] = {0x20, 0x20, 0x10, 0x10, 0x08, 0x08, 0x04, 0x04};
+ static const u8 Tab_AM3CLKDis[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00};
+ static const u8 Tab_S1CLKDis[] = {0xA2, 0xA2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
++static const u8 Tab_C32CLKDis[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00}; /* Enable CS0 - CS3 clocks (DIMM0 - DIMM1) */
++static const u8 Tab_G34CLKDis[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00}; /* Enable CS0 - CS3 clocks (DIMM0 - DIMM1) */
+ static const u8 Tab_ManualCLKDis[]= {0x10, 0x04, 0x08, 0x20, 0x00, 0x00, 0x00, 0x00};
+
+ static const u8 Table_Comp_Rise_Slew_20x[] = {7, 3, 2, 2, 0xFF};
+@@ -277,6 +279,11 @@ restartinit:
+ for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
+ struct DCTStatStruc *pDCTstat;
+ pDCTstat = pDCTstatA + Node;
++
++ /* Zero out data structures to avoid false detection of DIMMs */
++ memset(pDCTstat, 0, sizeof(struct DCTStatStruc));
++
++ /* Initialize data structures */
+ pDCTstat->Node_ID = Node;
+ pDCTstat->dev_host = PA_HOST(Node);
+ pDCTstat->dev_map = PA_MAP(Node);
+@@ -284,17 +291,22 @@ restartinit:
+ pDCTstat->dev_nbmisc = PA_NBMISC(Node);
+ pDCTstat->NodeSysBase = node_sys_base;
+
++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_init Node %d\n", Node);
+ mct_init(pMCTstat, pDCTstat);
+ mctNodeIDDebugPort_D();
+ pDCTstat->NodePresent = NodePresent_D(Node);
+ if (pDCTstat->NodePresent) { /* See if Node is there*/
++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: clear_legacy_Mode\n");
+ clear_legacy_Mode(pMCTstat, pDCTstat);
+ pDCTstat->LogicalCPUID = mctGetLogicalCPUID_D(Node);
+
++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_InitialMCT_D\n");
+ mct_InitialMCT_D(pMCTstat, pDCTstat);
+
++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mctSMBhub_Init\n");
+ mctSMBhub_Init(Node); /* Switch SMBUS crossbar to proper node*/
+
++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_initDCT\n");
+ mct_initDCT(pMCTstat, pDCTstat);
+ if (pDCTstat->ErrCode == SC_FatalErr) {
+ goto fatalexit; /* any fatal errors?*/
+@@ -345,6 +357,7 @@ restartinit:
+
+ mct_FinalMCT_D(pMCTstat, pDCTstatA);
+ printk(BIOS_DEBUG, "mctAutoInitMCT_D Done: Global Status: %x\n", pMCTstat->GStatus);
++
+ return;
+
+ fatalexit:
+@@ -560,7 +573,6 @@ static void HTMemMapInit_D(struct MCTStatStruc *pMCTstat,
+ pDCTstat = pDCTstatA + Node;
+ devx = pDCTstat->dev_map;
+ DramSelBaseAddr = 0;
+- pDCTstat = pDCTstatA + Node; /* ??? */
+ if (!pDCTstat->GangedMode) {
+ DramSelBaseAddr = pDCTstat->NodeSysLimit - pDCTstat->DCTSysLimit;
+ /*In unganged mode, we must add DCT0 and DCT1 to DCTSysLimit */
+@@ -645,6 +657,7 @@ static void HTMemMapInit_D(struct MCTStatStruc *pMCTstat,
+ devx = pDCTstat->dev_map;
+
+ if (pDCTstat->NodePresent) {
++ printk(BIOS_DEBUG, " Copy dram map from Node 0 to Node %02x \n", Node);
+ reg = 0x40; /*Dram Base 0*/
+ do {
+ val = Get_NB32(dev, reg);
+@@ -1162,7 +1175,7 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat,
+
+ /* Program DRAM Timing values */
+ DramTimingLo = 0; /* Dram Timing Low init */
+- val = pDCTstat->CASL - 2; /* pDCTstat.CASL to reg. definition */
++ val = pDCTstat->CASL - 4; /* pDCTstat.CASL to reg. definition */
+ DramTimingLo |= val;
+
+ val = pDCTstat->Trcd - Bias_TrcdT;
+@@ -1406,18 +1419,16 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat,
+ else if (tCKproposed16x <= 24) {
+ pDCTstat->TargetFreq = 6;
+ tCKproposed16x = 24;
+- }
+- else if (tCKproposed16x <= 30) {
++ } else if (tCKproposed16x <= 30) {
+ pDCTstat->TargetFreq = 5;
+ tCKproposed16x = 30;
+- }
+- else {
++ } else {
+ pDCTstat->TargetFreq = 4;
+ tCKproposed16x = 40;
+ }
+ /* Running through this loop twice:
+ - First time find tCL at target frequency
+- - Second tim find tCL at 400MHz */
++ - Second time find tCL at 400MHz */
+
+ for (;;) {
+ CLT_Fail = 0;
+@@ -1451,7 +1462,7 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat,
+ CLT_Fail = 1;
+ /* get CL and T */
+ if (!CLT_Fail) {
+- bytex = CLactual - 2;
++ bytex = CLactual;
+ if (tCKproposed16x == 20)
+ byte = 7;
+ else if (tCKproposed16x == 24)
+@@ -1632,7 +1643,7 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
+ val = 0x0f; /* recommended setting (default) */
+ DramConfigHi |= val << 24;
+
+- if (pDCTstat->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Bx))
++ if (pDCTstat->LogicalCPUID & (AMD_DR_Dx | AMD_DR_Cx | AMD_DR_Bx))
+ DramConfigHi |= 1 << DcqArbBypassEn;
+
+ /* Build MemClkDis Value from Dram Timing Lo and
+@@ -1657,6 +1668,10 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
+ p = Tab_L1CLKDis;
+ else if (byte == PT_M2 || byte == PT_AS)
+ p = Tab_AM3CLKDis;
++ else if (byte == PT_C3)
++ p = Tab_C32CLKDis;
++ else if (byte == PT_GR)
++ p = Tab_G34CLKDis;
+ else
+ p = Tab_S1CLKDis;
+
+@@ -2102,8 +2117,7 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat,
+ if (byte == JED_RDIMM || byte == JED_MiniRDIMM) {
+ RegDIMMPresent |= 1 << i;
+ pDCTstat->DimmRegistered[i] = 1;
+- }
+- else {
++ } else {
+ pDCTstat->DimmRegistered[i] = 0;
+ }
+ /* Check ECC capable */
+@@ -2977,9 +2991,9 @@ static void mct_FinalMCT_D(struct MCTStatStruc *pMCTstat,
+ } else { /* For Dx CPU */
+ val = 0x0CE00F00 | 1 << 29/* FlushWrOnStpGnt */;
+ if (!(pDCTstat->GangedMode))
+- val |= 0x20; /* MctWrLimit = 8 for Unganed mode */
++ val |= 0x20; /* MctWrLimit = 8 for Unganged mode */
+ else
+- val |= 0x40; /* MctWrLimit = 16 for ganed mode */
++ val |= 0x40; /* MctWrLimit = 16 for ganged mode */
+ Set_NB32(pDCTstat->dev_dct, 0x11C, val);
+
+ val = Get_NB32(pDCTstat->dev_dct, 0x1B0);
+@@ -3414,6 +3428,138 @@ static void mct_BeforeDramInit_Prod_D(struct MCTStatStruc *pMCTstat,
+ Set_NB32(dev, 0x98 + reg_off, 0x0D000030);
+ Set_NB32(dev, 0x9C + reg_off, dword);
+ Set_NB32(dev, 0x98 + reg_off, 0x4D040F30);
++
++ /* FIXME
++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
++ * For now assume a maximum of 2 DIMMs per channel can be installed
++ */
++ uint8_t MaxDimmsInstallable = 2;
++
++ /* Obtain number of DIMMs on channel */
++ uint8_t dimm_count = pDCTstat->MAdimms[i];
++ uint8_t rank_count_dimm0;
++ uint8_t rank_count_dimm1;
++ uint32_t odt_pattern_0;
++ uint32_t odt_pattern_1;
++ uint32_t odt_pattern_2;
++ uint32_t odt_pattern_3;
++
++ /* Select appropriate ODT pattern for installed DIMMs
++ * Refer to the BKDG Rev. 3.62, page 120 onwards
++ */
++ if (pDCTstat->C_DCTPtr[i]->Status[DCT_STATUS_REGISTERED]) {
++ if (MaxDimmsInstallable == 2) {
++ if (dimm_count == 1) {
++ /* 1 DIMM detected */
++ rank_count_dimm1 = pDCTstat->C_DCTPtr[i]->DimmRanks[1];
++ if (rank_count_dimm1 == 1) {
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x00020000;
++ } else if (rank_count_dimm1 == 2) {
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x02080000;
++ } else if (rank_count_dimm1 == 4) {
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x020a0000;
++ odt_pattern_3 = 0x080a0000;
++ } else {
++ /* Fallback */
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x00000000;
++ }
++ } else {
++ /* 2 DIMMs detected */
++ rank_count_dimm0 = pDCTstat->C_DCTPtr[i]->DimmRanks[0];
++ rank_count_dimm1 = pDCTstat->C_DCTPtr[i]->DimmRanks[1];
++ if ((rank_count_dimm0 < 4) && (rank_count_dimm1 < 4)) {
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x01010202;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x09030603;
++ } else if ((rank_count_dimm0 < 4) && (rank_count_dimm1 == 4)) {
++ odt_pattern_0 = 0x01010000;
++ odt_pattern_1 = 0x01010a0a;
++ odt_pattern_2 = 0x01090000;
++ odt_pattern_3 = 0x01030e0b;
++ } else if ((rank_count_dimm0 == 4) && (rank_count_dimm1 < 4)) {
++ odt_pattern_0 = 0x00000202;
++ odt_pattern_1 = 0x05050202;
++ odt_pattern_2 = 0x00000206;
++ odt_pattern_3 = 0x0d070203;
++ } else if ((rank_count_dimm0 == 4) && (rank_count_dimm1 == 4)) {
++ odt_pattern_0 = 0x05050a0a;
++ odt_pattern_1 = 0x05050a0a;
++ odt_pattern_2 = 0x050d0a0e;
++ odt_pattern_3 = 0x05070a0b;
++ } else {
++ /* Fallback */
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x00000000;
++ }
++ }
++ } else {
++ /* FIXME
++ * 3 DIMMs per channel UNIMPLEMENTED
++ */
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x00000000;
++ }
++ } else {
++ if (MaxDimmsInstallable == 2) {
++ if (dimm_count == 1) {
++ /* 1 DIMM detected */
++ rank_count_dimm1 = pDCTstat->C_DCTPtr[i]->DimmRanks[1];
++ if (rank_count_dimm1 == 1) {
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x00020000;
++ } else if (rank_count_dimm1 == 2) {
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x02080000;
++ } else {
++ /* Fallback */
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x00000000;
++ }
++ } else {
++ /* 2 DIMMs detected */
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x01010202;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x09030603;
++ }
++ } else {
++ /* FIXME
++ * 3 DIMMs per channel UNIMPLEMENTED
++ */
++ odt_pattern_0 = 0x00000000;
++ odt_pattern_1 = 0x00000000;
++ odt_pattern_2 = 0x00000000;
++ odt_pattern_3 = 0x00000000;
++ }
++ }
++
++ /* Program ODT pattern */
++ Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x180, odt_pattern_1);
++ Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x181, odt_pattern_0);
++ Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x182, odt_pattern_3);
++ Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x183, odt_pattern_2);
+ }
+ }
+ }
+@@ -3657,6 +3803,7 @@ static void mct_BeforeDQSTrain_D(struct MCTStatStruc *pMCTstat,
+ }
+ }
+
++/* Erratum 350 */
+ static void mct_ResetDLL_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct)
+ {
+@@ -3692,11 +3839,11 @@ static void mct_ResetDLL_D(struct MCTStatStruc *pMCTstat,
+ mct_Read1LTestPattern_D(pMCTstat, pDCTstat, addr); /* cache fills */
+
+ /* Write 0000_8000h to register F2x[1,0]9C_xD080F0C */
+- Set_NB32_index_wait(dev, 0x98 + reg_off, 0x4D080F0C, 0x00008000);
++ Set_NB32_index_wait(dev, 0x98 + reg_off, 0xD080F0C, 0x00008000);
+ mct_Wait(80); /* wait >= 300ns */
+
+ /* Write 0000_0000h to register F2x[1,0]9C_xD080F0C */
+- Set_NB32_index_wait(dev, 0x98 + reg_off, 0x4D080F0C, 0x00000000);
++ Set_NB32_index_wait(dev, 0x98 + reg_off, 0xD080F0C, 0x00000000);
+ mct_Wait(800); /* wait >= 2us */
+ break;
+ }
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
+index e2d7aa8..219aa42 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
+@@ -499,7 +499,7 @@ struct DCTStatStruc { /* A per Node structure*/
+ /* CHB DIMM0 Byte 0 - 7 TxDqs */
+ /* CHB DIMM1 Byte 0 - 7 TxDqs */
+ /* CHB DIMM1 Byte 0 - 7 TxDqs */
+- u8 CH_D_B_RCVRDLY[2][4][8]; /* [A/B] [DIMM0-3] [DQS] */
++ u16 CH_D_B_RCVRDLY[2][4][8]; /* [A/B] [DIMM0-3] [DQS] */
+ /* CHA DIMM 0 Receiver Enable Delay*/
+ /* CHA DIMM 1 Receiver Enable Delay*/
+ /* CHA DIMM 2 Receiver Enable Delay*/
+@@ -509,7 +509,7 @@ struct DCTStatStruc { /* A per Node structure*/
+ /* CHB DIMM 1 Receiver Enable Delay*/
+ /* CHB DIMM 2 Receiver Enable Delay*/
+ /* CHB DIMM 3 Receiver Enable Delay*/
+- u8 CH_D_BC_RCVRDLY[2][4];
++ u16 CH_D_BC_RCVRDLY[2][4];
+ /* CHA DIMM 0 - 4 Check Byte Receiver Enable Delay*/
+ /* CHB DIMM 0 - 4 Check Byte Receiver Enable Delay*/
+ u8 DIMMValidDCT[2]; /* DIMM# in DCT0*/
+@@ -769,7 +769,7 @@ u8 mct_checkNumberOfDqsRcvEn_1Pass(u8 pass);
+ u32 SetupDqsPattern_1PassA(u8 Pass);
+ u32 SetupDqsPattern_1PassB(u8 Pass);
+ u8 mct_Get_Start_RcvrEnDly_1Pass(u8 Pass);
+-u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 RcvrEnDlyLimit, u8 Channel, u8 Receiver, u8 Pass);
++u16 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly, u16 RcvrEnDlyLimit, u8 Channel, u8 Receiver, u8 Pass);
+ void CPUMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
+ void UMAMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
+ u32 mctGetLogicalCPUID(u32 Node);
+@@ -779,7 +779,7 @@ void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTs
+ void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
+ void TrainMaxReadLatency_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
+ void mct_EndDQSTraining_D(struct MCTStatStruc *pMCTstat,struct DCTStatStruc *pDCTstatA);
+-void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 FinalValue, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass);
++void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly, u8 FinalValue, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass);
+ void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel);
+ void mctGet_PS_Cfg_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 dct);
+ void InterleaveBanks_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct);
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h
+index 60f98bc..c40ea1a 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -103,10 +104,10 @@ static void proc_CLFLUSH(u32 addr_hi)
+
+ __asm__ volatile (
+ /* clflush fs:[eax] */
+- "outb %%al, $0xed\n\t" /* _EXECFENCE */
+- "clflush %%fs:(%0)\n\t"
++ "outb %%al, $0xed\n\t" /* _EXECFENCE */
++ "clflush %%fs:(%0)\n\t"
+ "mfence\n\t"
+- ::"a" (addr_hi<<8)
++ ::"a" (addr_hi<<8)
+ );
+ }
+
+@@ -141,6 +142,24 @@ static u32 read32_fs(u32 addr_lo)
+ return value;
+ }
+
++static uint64_t read64_fs(uint32_t addr_lo)
++{
++ uint64_t value = 0;
++ uint32_t value_lo;
++ uint32_t value_hi;
++
++ __asm__ volatile (
++ "outb %%al, $0xed\n\t" /* _EXECFENCE */
++ "mfence\n\t"
++ "movl %%fs:(%2), %0\n\t"
++ "movl %%fs:(%3), %1\n\t"
++ :"=c"(value_lo), "=d"(value_hi): "a" (addr_lo), "b" (addr_lo + 4) : "memory"
++ );
++ value |= value_lo;
++ value |= ((uint64_t)value_hi) << 32;
++ return value;
++}
++
+ #ifdef UNUSED_CODE
+ static u8 read8_fs(u32 addr_lo)
+ {
+@@ -210,68 +229,6 @@ static __attribute__((noinline)) void FlushDQSTestPattern_L18(u32 addr_lo)
+ );
+ }
+
+-static void ReadL18TestPattern(u32 addr_lo)
+-{
+- /* set fs and use fs prefix to access the mem */
+- __asm__ volatile (
+- "outb %%al, $0xed\n\t" /* _EXECFENCE */
+- "movl %%fs:-128(%%esi), %%eax\n\t" /* TestAddr cache line */
+- "movl %%fs:-64(%%esi), %%eax\n\t" /* +1 */
+- "movl %%fs:(%%esi), %%eax\n\t" /* +2 */
+- "movl %%fs:64(%%esi), %%eax\n\t" /* +3 */
+-
+- "movl %%fs:-128(%%edi), %%eax\n\t" /* +4 */
+- "movl %%fs:-64(%%edi), %%eax\n\t" /* +5 */
+- "movl %%fs:(%%edi), %%eax\n\t" /* +6 */
+- "movl %%fs:64(%%edi), %%eax\n\t" /* +7 */
+-
+- "movl %%fs:-128(%%ebx), %%eax\n\t" /* +8 */
+- "movl %%fs:-64(%%ebx), %%eax\n\t" /* +9 */
+- "movl %%fs:(%%ebx), %%eax\n\t" /* +10 */
+- "movl %%fs:64(%%ebx), %%eax\n\t" /* +11 */
+-
+- "movl %%fs:-128(%%ecx), %%eax\n\t" /* +12 */
+- "movl %%fs:-64(%%ecx), %%eax\n\t" /* +13 */
+- "movl %%fs:(%%ecx), %%eax\n\t" /* +14 */
+- "movl %%fs:64(%%ecx), %%eax\n\t" /* +15 */
+-
+- "movl %%fs:-128(%%edx), %%eax\n\t" /* +16 */
+- "movl %%fs:-64(%%edx), %%eax\n\t" /* +17 */
+- "mfence\n\t"
+-
+- :: "a"(0), "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64),
+- "d" (addr_lo +128+16*64), "S"(addr_lo+128),
+- "D"(addr_lo+128+4*64)
+- );
+-
+-}
+-
+-static void ReadL9TestPattern(u32 addr_lo)
+-{
+-
+- /* set fs and use fs prefix to access the mem */
+- __asm__ volatile (
+- "outb %%al, $0xed\n\t" /* _EXECFENCE */
+-
+- "movl %%fs:-128(%%ecx), %%eax\n\t" /* TestAddr cache line */
+- "movl %%fs:-64(%%ecx), %%eax\n\t" /* +1 */
+- "movl %%fs:(%%ecx), %%eax\n\t" /* +2 */
+- "movl %%fs:64(%%ecx), %%eax\n\t" /* +3 */
+-
+- "movl %%fs:-128(%%edx), %%eax\n\t" /* +4 */
+- "movl %%fs:-64(%%edx), %%eax\n\t" /* +5 */
+- "movl %%fs:(%%edx), %%eax\n\t" /* +6 */
+- "movl %%fs:64(%%edx), %%eax\n\t" /* +7 */
+-
+- "movl %%fs:-128(%%ebx), %%eax\n\t" /* +8 */
+- "mfence\n\t"
+-
+- :: "a"(0), "b" (addr_lo+128+8*64), "c"(addr_lo+128),
+- "d"(addr_lo+128+4*64)
+- );
+-
+-}
+-
+ static void ReadMaxRdLat1CLTestPattern_D(u32 addr)
+ {
+ SetUpperFSbase(addr);
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c b/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c
+index ae1654c..99a2628 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -17,7 +18,7 @@
+ * Foundation, Inc.
+ */
+
+-/* The socket type F (1207), Fr2, G (1207) are not tested.
++/* The socket type Fr2, G (1207) are not tested.
+ */
+
+ static void Get_ChannelPS_Cfg0_D(u8 MAAdimms, u8 Speed, u8 MAAload,
+@@ -79,8 +80,7 @@ static void Get_ChannelPS_Cfg0_D( u8 MAAdimms, u8 Speed, u8 MAAload,
+ else
+ *AddrTmgCTL = 0x00353935;
+ }
+- }
+- else {
++ } else {
+ if(Speed == 4) {
+ *AddrTmgCTL = 0x00000000;
+ if (MAAdimms == 3)
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
+index 404727b..8572243 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -22,13 +23,6 @@ static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat,
+ u8 scale, u8 ChipSel);
+ static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 ChipSel);
+-static u8 MiddleDQS_D(u8 min, u8 max);
+-static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat,
+- u8 cs_start);
+-static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat,
+- u8 cs_start);
+ static void WriteDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat,
+ u32 TestAddr_lo);
+@@ -43,31 +37,19 @@ static void FlushDQSTestPattern_D(struct DCTStatStruc *pDCTstat,
+ u32 addr_lo);
+ static void SetTargetWTIO_D(u32 TestAddr);
+ static void ResetTargetWTIO_D(void);
+-static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat,
+- u32 TestAddr_lo);
+-static void mctEngDQSwindow_Save_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat, u8 ChipSel,
+- u8 RnkDlyFilterMin, u8 RnkDlyFilterMax);
+ void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index);
+ u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat);
+ static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat,
+ u8 ChipSel);
+-static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat,
+- u8 cs_start);
+ u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 Channel,
+ u8 receiver, u8 *valid);
+ static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat,
+ u32 *buffer);
+-
+-static void StoreWrRdDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat, u8 ChipSel,
+- u8 RnkDlyFilterMin, u8 RnkDlyFilterMax);
++static void proc_IOCLFLUSH_D(u32 addr_hi);
+
+ static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 ChipSel);
+
+@@ -286,20 +268,99 @@ static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat,
+ pDCTstat->DQSDelay = (u8)DQSDelay;
+ }
+
++static void write_dqs_write_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
++{
++ uint32_t dword;
++
++ /* Lanes 0 - 3 */
++ dword = Get_NB32_index_wait(dev, index_reg, 0x1 | (dimm << 8));
++ dword &= ~0x7f7f7f7f;
++ dword |= (delay[3] & 0x7f) << 24;
++ dword |= (delay[2] & 0x7f) << 16;
++ dword |= (delay[1] & 0x7f) << 8;
++ dword |= delay[0] & 0x7f;
++ Set_NB32_index_wait(dev, index_reg, 0x1 | (dimm << 8), dword);
++
++ /* Lanes 4 - 7 */
++ dword = Get_NB32_index_wait(dev, index_reg, 0x2 | (dimm << 8));
++ dword &= ~0x7f7f7f7f;
++ dword |= (delay[7] & 0x7f) << 24;
++ dword |= (delay[6] & 0x7f) << 16;
++ dword |= (delay[5] & 0x7f) << 8;
++ dword |= delay[4] & 0x7f;
++ Set_NB32_index_wait(dev, index_reg, 0x2 | (dimm << 8), dword);
++
++ /* Lane 8 (ECC) */
++ dword = Get_NB32_index_wait(dev, index_reg, 0x3 | (dimm << 8));
++ dword &= ~0x0000007f;
++ dword |= delay[8] & 0x7f;
++ Set_NB32_index_wait(dev, index_reg, 0x3 | (dimm << 8), dword);
++}
++
++static void write_dqs_read_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
++{
++ uint32_t dword;
++
++ /* Lanes 0 - 3 */
++ dword = Get_NB32_index_wait(dev, index_reg, 0x5 | (dimm << 8));
++ dword &= ~0x3f3f3f3f;
++ dword |= (delay[3] & 0x3f) << 24;
++ dword |= (delay[2] & 0x3f) << 16;
++ dword |= (delay[1] & 0x3f) << 8;
++ dword |= delay[0] & 0x3f;
++ Set_NB32_index_wait(dev, index_reg, 0x5 | (dimm << 8), dword);
++
++ /* Lanes 4 - 7 */
++ dword = Get_NB32_index_wait(dev, index_reg, 0x6 | (dimm << 8));
++ dword &= ~0x3f3f3f3f;
++ dword |= (delay[7] & 0x3f) << 24;
++ dword |= (delay[6] & 0x3f) << 16;
++ dword |= (delay[5] & 0x3f) << 8;
++ dword |= delay[4] & 0x3f;
++ Set_NB32_index_wait(dev, index_reg, 0x6 | (dimm << 8), dword);
++
++ /* Lane 8 (ECC) */
++ dword = Get_NB32_index_wait(dev, index_reg, 0x7 | (dimm << 8));
++ dword &= ~0x0000003f;
++ dword |= delay[8] & 0x3f;
++ Set_NB32_index_wait(dev, index_reg, 0x7 | (dimm << 8), dword);
++}
++
++/* DQS Position Training
++ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.3
++ */
+ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat,
+- u8 cs_start)
++ struct DCTStatStruc *pDCTstat)
+ {
+ u32 Errors;
+- u8 Channel, DQSWrDelay;
++ u8 Channel;
++ u8 Receiver;
+ u8 _DisableDramECC = 0;
+- u32 PatternBuffer[292];
++ u32 PatternBuffer[304]; /* 288 + 16 */
+ u8 _Wrap32Dis = 0, _SSE2 = 0;
+- u8 dqsWrDelay_end;
+
++ u32 dev;
+ u32 addr;
++ u8 valid;
+ u32 cr4;
+ u32 lo, hi;
++ u32 index_reg;
++ uint32_t TestAddr;
++
++ uint8_t dual_rank;
++ uint8_t iter;
++ uint8_t lane;
++ uint16_t bytelane_test_results;
++ uint16_t current_write_dqs_delay[MAX_BYTE_LANES];
++ uint16_t current_read_dqs_delay[MAX_BYTE_LANES];
++ uint16_t write_dqs_delay_stepping_done[MAX_BYTE_LANES];
++ uint8_t dqs_read_results_array[2][MAX_BYTE_LANES][64]; /* [rank][lane][step] */
++ uint8_t dqs_write_results_array[2][MAX_BYTE_LANES][128]; /* [rank][lane][step] */
++
++ uint8_t last_pos = 0;
++ uint8_t cur_count = 0;
++ uint8_t best_pos = 0;
++ uint8_t best_count = 0;
+
+ print_debug_dqs("\nTrainDQSRdWrPos: Node_ID ", pDCTstat->Node_ID, 0);
+ cr4 = read_cr4();
+@@ -323,50 +384,363 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
+ SetupDqsPattern_D(pMCTstat, pDCTstat, PatternBuffer);
+
+ /* mct_BeforeTrainDQSRdWrPos_D */
+- dqsWrDelay_end = 0x20;
++
++ dev = pDCTstat->dev_dct;
++ pDCTstat->Direction = DQS_READDIR;
++
++ /* 2.8.9.9.3 (2)
++ * Loop over each channel, lane, and rank
++ */
++
++ /* NOTE
++ * The BKDG originally stated to iterate over lane, then rank, however this process is quite slow
++ * compared to an equivalent loop over rank, then lane as the latter allows multiple lanes to be
++ * tested simultaneously, thus improving performance by around 8x.
++ */
+
+ Errors = 0;
+ for (Channel = 0; Channel < 2; Channel++) {
+- print_debug_dqs("\tTrainDQSRdWrPos: 1 Channel ",Channel, 1);
++ print_debug_dqs("\tTrainDQSRdWrPos: 1 Channel ", Channel, 1);
+ pDCTstat->Channel = Channel;
+
+ if (pDCTstat->DIMMValidDCT[Channel] == 0) /* mct_BeforeTrainDQSRdWrPos_D */
+ continue;
+- pDCTstat->DqsRdWrPos_Saved = 0;
+- for ( DQSWrDelay = 0; DQSWrDelay < dqsWrDelay_end; DQSWrDelay++) {
+- pDCTstat->DQSDelay = DQSWrDelay;
+- pDCTstat->Direction = DQS_WRITEDIR;
+- mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start);
+-
+- print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2);
+- TrainReadDQS_D(pMCTstat, pDCTstat, cs_start);
+- print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DqsRdWrPos_Saved ", pDCTstat->DqsRdWrPos_Saved, 2);
+- if (pDCTstat->DqsRdWrPos_Saved == 0xFF)
+- break;
+-
+- print_debug_dqs("\t\tTrainDQSRdWrPos: 22 TrainErrors ",pDCTstat->TrainErrors, 2);
+- if (pDCTstat->TrainErrors == 0) {
++
++ index_reg = 0x98 + 0x100 * Channel;
++
++ dual_rank = 0;
++ Receiver = mct_InitReceiver_D(pDCTstat, Channel);
++ /* There are four receiver pairs, loosely associated with chipselects.
++ * This is essentially looping over each rank of each DIMM.
++ */
++ for (; Receiver < 8; Receiver++) {
++ if ((Receiver & 0x1) == 0) {
++ /* Even rank of DIMM */
++ if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1))
++ dual_rank = 1;
++ else
++ dual_rank = 0;
++ }
++
++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
++ continue;
++ }
++
++ /* Select the base test address for the current rank */
++ TestAddr = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid);
++ if (!valid) { /* Address not supported on current CS */
++ continue;
++ }
++
++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 14 TestAddr ", TestAddr, 4);
++ SetUpperFSbase(TestAddr); /* fs:eax=far ptr to target */
++
++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 12 Receiver ", Receiver, 2);
++
++ /* 2.8.9.9.3 (DRAM Write Data Timing Loop)
++ * Iterate over all possible DQS delay values (0x0 - 0x7f)
++ */
++ uint8_t test_write_dqs_delay = 0;
++ uint8_t test_read_dqs_delay = 0;
++ uint8_t passing_dqs_delay_found[MAX_BYTE_LANES];
++
++ /* Initialize variables */
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ current_write_dqs_delay[lane] = 0;
++ passing_dqs_delay_found[lane] = 0;
++ write_dqs_delay_stepping_done[lane] = 0;
++ }
++
++ for (test_write_dqs_delay = 0; test_write_dqs_delay < 128; test_write_dqs_delay++) {
++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 16 test_write_dqs_delay ", test_write_dqs_delay, 6);
++
++ /* Break out of loop if passing window already found, */
++ if (write_dqs_delay_stepping_done[0] && write_dqs_delay_stepping_done[1]
++ && write_dqs_delay_stepping_done[2] && write_dqs_delay_stepping_done[3]
++ && write_dqs_delay_stepping_done[4] && write_dqs_delay_stepping_done[5]
++ && write_dqs_delay_stepping_done[6] && write_dqs_delay_stepping_done[7])
+ break;
++
++ /* Commit the current Write Data Timing settings to the hardware registers */
++ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg);
++
++ /* Write the DRAM training pattern to the base test address */
++ WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
++
++ /* 2.8.9.9.3 (DRAM Read DQS Timing Control Loop)
++ * Iterate over all possible DQS delay values (0x0 - 0x3f)
++ */
++ for (test_read_dqs_delay = 0; test_read_dqs_delay < 64; test_read_dqs_delay++) {
++ print_debug_dqs("\t\t\t\t\tTrainDQSRdWrPos: 161 test_read_dqs_delay ", test_read_dqs_delay, 6);
++
++ /* Initialize Read DQS Timing Control settings for this iteration */
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++)
++ if (!write_dqs_delay_stepping_done[lane])
++ current_read_dqs_delay[lane] = test_read_dqs_delay;
++
++ /* Commit the current Read DQS Timing Control settings to the hardware registers */
++ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg);
++
++ /* Initialize test result variable */
++ bytelane_test_results = 0xff;
++
++ /* Read the DRAM training pattern from the base test address three times
++ * NOTE
++ * While the BKDG states to read three times this is probably excessive!
++ * Decrease training time by only reading the test pattern once per iteration
++ */
++ for (iter = 0; iter < 1; iter++) {
++ /* Flush caches */
++ SetTargetWTIO_D(TestAddr);
++ FlushDQSTestPattern_D(pDCTstat, TestAddr << 8);
++ ResetTargetWTIO_D();
++
++ /* Read and compare pattern */
++ bytelane_test_results &= (CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8) & 0xff); /* [Lane 7 :: Lane 0] 0=fail, 1=pass */
++
++ /* If all lanes have already failed testing bypass remaining re-read attempt(s) */
++ if (bytelane_test_results == 0x0)
++ break;
++ }
++
++ /* Store any lanes that passed testing for later use */
++ for (lane = 0; lane < 8; lane++)
++ if (!write_dqs_delay_stepping_done[lane])
++ dqs_read_results_array[Receiver & 0x1][lane][test_read_dqs_delay] = (!!(bytelane_test_results & (1 << lane)));
++
++ print_debug_dqs("\t\t\t\t\tTrainDQSRdWrPos: 162 bytelane_test_results ", bytelane_test_results, 6);
++ }
++
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ if (write_dqs_delay_stepping_done[lane])
++ continue;
++
++ /* Determine location and length of longest consecutive string of passing values
++ * Output is stored in best_pos and best_count
++ */
++ last_pos = 0;
++ cur_count = 0;
++ best_pos = 0;
++ best_count = 0;
++ for (iter = 0; iter < 64; iter++) {
++ if ((dqs_read_results_array[Receiver & 0x1][lane][iter]) && (iter < 63)) {
++ /* Pass */
++ cur_count++;
++ } else {
++ /* Failure or end of loop */
++ if (cur_count > best_count) {
++ best_count = cur_count;
++ best_pos = last_pos;
++ }
++ cur_count = 0;
++ last_pos = iter;
++ }
++ }
++
++ if (best_count > 2) {
++ /* Exit the DRAM Write Data Timing Loop after programming the Read DQS Timing Control
++ * register with the center of the passing window
++ */
++ current_read_dqs_delay[lane] = (best_pos + (best_count / 2));
++ passing_dqs_delay_found[lane] = 1;
++
++ /* Commit the current Read DQS Timing Control settings to the hardware registers */
++ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg);
++
++ /* Exit the DRAM Write Data Timing Loop */
++ write_dqs_delay_stepping_done[lane] = 1;
++
++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 142 largest passing region ", best_count, 4);
++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 143 largest passing region start ", best_pos, 4);
++ }
++
++ /* Increment the DQS Write Delay value if needed for the next DRAM Write Data Timing Loop iteration */
++ if (!write_dqs_delay_stepping_done[lane])
++ current_write_dqs_delay[lane]++;
++ }
+ }
+- Errors |= pDCTstat->TrainErrors;
+- }
+
+- pDCTstat->DqsRdWrPos_Saved = 0;
+- if (DQSWrDelay < dqsWrDelay_end) {
+- Errors = 0;
++ /* Flag failure(s) if present */
++ for (lane = 0; lane < 8; lane++) {
++ if (!passing_dqs_delay_found[lane]) {
++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 121 Unable to find passing region for lane ", lane, 2);
++
++ /* Flag absence of passing window */
++ Errors |= 1 << SB_NODQSPOS;
++ }
++ }
++
++ /* Iterate over all possible Write Data Timing values (0x0 - 0x7f)
++ * Note that the Read DQS Timing Control was calibrated / centered in the prior nested loop
++ */
++ for (test_write_dqs_delay = 0; test_write_dqs_delay < 128; test_write_dqs_delay++) {
++ /* Initialize Write Data Timing settings for this iteration */
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++)
++ current_write_dqs_delay[lane] = test_write_dqs_delay;
++
++ /* Commit the current Write Data Timing settings to the hardware registers */
++ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg);
++
++ /* Write the DRAM training pattern to the base test address */
++ WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
++
++ /* Flush caches */
++ SetTargetWTIO_D(TestAddr);
++ FlushDQSTestPattern_D(pDCTstat, TestAddr << 8);
++ ResetTargetWTIO_D();
++
++ /* Read and compare pattern from the base test address */
++ bytelane_test_results = (CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8) & 0xff); /* [Lane 7 :: Lane 0] 0=fail, 1=pass */
++
++ /* Store any lanes that passed testing for later use */
++ for (lane = 0; lane < 8; lane++)
++ dqs_write_results_array[Receiver & 0x1][lane][test_write_dqs_delay] = (!!(bytelane_test_results & (1 << lane)));
++ }
++
++ for (lane = 0; lane < 8; lane++) {
++ if ((!dual_rank) || (dual_rank && (Receiver & 0x1))) {
++
++#ifdef PRINT_PASS_FAIL_BITMAPS
++ for (iter = 0; iter < 64; iter++) {
++ if (dqs_read_results_array[0][lane][iter])
++ printk(BIOS_DEBUG, "+");
++ else
++ printk(BIOS_DEBUG, ".");
++ }
++ printk(BIOS_DEBUG, "\n");
++ for (iter = 0; iter < 64; iter++) {
++ if (dqs_read_results_array[1][lane][iter])
++ printk(BIOS_DEBUG, "+");
++ else
++ printk(BIOS_DEBUG, ".");
++ }
++ printk(BIOS_DEBUG, "\n\n");
++ for (iter = 0; iter < 128; iter++) {
++ if (dqs_write_results_array[0][lane][iter])
++ printk(BIOS_DEBUG, "+");
++ else
++ printk(BIOS_DEBUG, ".");
++ }
++ printk(BIOS_DEBUG, "\n");
++ for (iter = 0; iter < 128; iter++) {
++ if (dqs_write_results_array[1][lane][iter])
++ printk(BIOS_DEBUG, "+");
++ else
++ printk(BIOS_DEBUG, ".");
++ }
++ printk(BIOS_DEBUG, "\n\n");
++#endif
++
++ /* Base rank of single-rank DIMM, or odd rank of dual-rank DIMM */
++ if (dual_rank) {
++ /* Intersect the passing windows of both ranks */
++ for (iter = 0; iter < 64; iter++)
++ if (!dqs_read_results_array[1][lane][iter])
++ dqs_read_results_array[0][lane][iter] = 0;
++ for (iter = 0; iter < 128; iter++)
++ if (!dqs_write_results_array[1][lane][iter])
++ dqs_write_results_array[0][lane][iter] = 0;
++ }
++
++ /* Determine location and length of longest consecutive string of passing values for read DQS timing
++ * Output is stored in best_pos and best_count
++ */
++ last_pos = 0;
++ cur_count = 0;
++ best_pos = 0;
++ best_count = 0;
++ for (iter = 0; iter < 64; iter++) {
++ if ((dqs_read_results_array[0][lane][iter]) && (iter < 63)) {
++ /* Pass */
++ cur_count++;
++ } else {
++ /* Failure or end of loop */
++ if (cur_count > best_count) {
++ best_count = cur_count;
++ best_pos = last_pos;
++ }
++ cur_count = 0;
++ last_pos = iter;
++ }
++ }
++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 144 largest read passing region ", best_count, 4);
++ if (best_count > 0) {
++ if (best_count < MIN_DQS_WNDW) {
++ /* Flag excessively small passing window */
++ Errors |= 1 << SB_SMALLDQS;
++ }
++
++ /* Find the center of the passing window */
++ current_read_dqs_delay[lane] = (best_pos + (best_count / 2));
++
++ /* Commit the current Read DQS Timing Control settings to the hardware registers */
++ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg);
++
++ /* Save the final Read DQS Timing Control settings for later use */
++ pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][DQS_READDIR][lane] = current_read_dqs_delay[lane];
++ } else {
++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 122 Unable to find read passing region for lane ", lane, 2);
++
++ /* Flag absence of passing window */
++ Errors |= 1 << SB_NODQSPOS;
++ }
++
++ /* Determine location and length of longest consecutive string of passing values for write DQS timing
++ * Output is stored in best_pos and best_count
++ */
++ last_pos = 0;
++ cur_count = 0;
++ best_pos = 0;
++ best_count = 0;
++ for (iter = 0; iter < 128; iter++) {
++ if ((dqs_write_results_array[0][lane][iter]) && (iter < 127)) {
++ /* Pass */
++ cur_count++;
++ } else {
++ /* Failure or end of loop */
++ if (cur_count > best_count) {
++ best_count = cur_count;
++ best_pos = last_pos;
++ }
++ cur_count = 0;
++ last_pos = iter;
++ }
++ }
++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 145 largest write passing region ", best_count, 4);
++ if (best_count > 0) {
++ if (best_count < MIN_DQS_WNDW) {
++ /* Flag excessively small passing window */
++ Errors |= 1 << SB_SMALLDQS;
++ }
++
++ /* Find the center of the passing window */
++ current_write_dqs_delay[lane] = (best_pos + (best_count / 2));
++
++ /* Commit the current Write Data Timing settings to the hardware registers */
++ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg);
++
++ /* Save the final Write Data Timing settings for later use */
++ pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][DQS_WRITEDIR][lane] = current_write_dqs_delay[lane];
++ } else {
++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 123 Unable to find write passing region for lane ", lane, 2);
++
++ /* Flag absence of passing window */
++ Errors |= 1 << SB_NODQSPOS;
++ }
++ }
++ }
+
+- print_debug_dqs("\tTrainDQSRdWrPos: 231 DQSWrDelay ", DQSWrDelay, 1);
+- TrainWriteDQS_D(pMCTstat, pDCTstat, cs_start);
+ }
+- print_debug_dqs("\tTrainDQSRdWrPos: 232 Errors ", Errors, 1);
+- pDCTstat->ErrStatus |= Errors;
+ }
+
++ pDCTstat->TrainErrors |= Errors;
++ pDCTstat->ErrStatus |= Errors;
++
+ #if DQS_TRAIN_DEBUG > 0
+ {
+ u8 val;
+ u8 i;
+- u8 Channel, Receiver, Dir;
++ u8 ChannelDTD, ReceiverDTD, Dir;
+ u8 *p;
+
+ for (Dir = 0; Dir < 2; Dir++) {
+@@ -375,14 +749,14 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
+ } else {
+ printk(BIOS_DEBUG, "TrainDQSRdWrPos: CH_D_DIR_B_DQS RD:\n");
+ }
+- for (Channel = 0; Channel < 2; Channel++) {
+- printk(BIOS_DEBUG, "Channel: %02x\n", Channel);
+- for (Receiver = cs_start; Receiver < (cs_start + 2); Receiver += 2) {
+- printk(BIOS_DEBUG, "\t\tReceiver: %02x: ", Receiver);
+- p = pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][Dir];
++ for (ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) {
++ printk(BIOS_DEBUG, "Channel: %02x\n", ChannelDTD);
++ for (ReceiverDTD = 0; ReceiverDTD < MAX_CS_SUPPORTED; ReceiverDTD += 2) {
++ printk(BIOS_DEBUG, "\t\tReceiver: %02x:", ReceiverDTD);
++ p = pDCTstat->CH_D_DIR_B_DQS[ChannelDTD][ReceiverDTD >> 1][Dir];
+ for (i=0;i<8; i++) {
+ val = p[i];
+- printk(BIOS_DEBUG, "%02x ", val);
++ printk(BIOS_DEBUG, " %02x", val);
+ }
+ printk(BIOS_DEBUG, "\n");
+ }
+@@ -437,225 +811,6 @@ static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat,
+ pDCTstat->PtrPatternBufA = (u32)buf;
+ }
+
+-static void TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat,
+- u8 cs_start)
+-{
+- u32 Errors;
+- u8 ChipSel, DQSDelay;
+- u8 RnkDlySeqPassMin=0, RnkDlySeqPassMax=0xFF, RnkDlyFilterMin=0, RnkDlyFilterMax=0xFF;
+- u8 RnkDlySeqPassMinTot=0, RnkDlySeqPassMaxTot=0xFF, RnkDlyFilterMinTot=0, RnkDlyFilterMaxTot=0xFF;
+- u8 LastTest ,LastTestTot;
+- u32 TestAddr;
+- u8 ByteLane;
+- u8 MutualCSPassW[128];
+- u8 BanksPresent;
+- u8 dqsDelay_end;
+- u8 tmp, valid, tmp1;
+- u16 word;
+-
+- /* MutualCSPassW: each byte represents a bitmap of pass/fail per
+- * ByteLane. The indext within MutualCSPassW is the delay value
+- * given the results.
+- */
+- print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3);
+-
+- Errors = 0;
+- BanksPresent = 0;
+-
+- dqsDelay_end = 32;
+- /* Bitmapped status per delay setting, 0xff=All positions
+- * passing (1= PASS). Set the entire array.
+- */
+- for (DQSDelay=0; DQSDelay<128; DQSDelay++) {
+- MutualCSPassW[DQSDelay] = 0xFF;
+- }
+-
+- for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) { /* logical register chipselects 0..7 */
+- print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4);
+-
+- if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) {
+- print_debug_dqs("\t\t\t\tmct_RcvrRankEnabled_D CS not enabled ", ChipSel, 4);
+- continue;
+- }
+-
+- BanksPresent = 1; /* flag for at least one bank is present */
+- TestAddr = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel, &valid);
+- if (!valid) {
+- print_debug_dqs("\t\t\t\tAddress not supported on current CS ", TestAddr, 4);
+- continue;
+- }
+-
+- print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4);
+- SetUpperFSbase(TestAddr); /* fs:eax=far ptr to target */
+-
+- if (pDCTstat->Direction == DQS_READDIR) {
+- print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read ", 0, 4);
+- WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
+- }
+-
+- for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) {
+- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5);
+-
+- tmp = 0xFF;
+- tmp1 = DQSDelay;
+- if (pDCTstat->Direction == DQS_READDIR) {
+- tmp &= MutualCSPassW[DQSDelay];
+- tmp1 += dqsDelay_end;
+- }
+- tmp &= MutualCSPassW[tmp1];
+-
+- if (tmp == 0) {
+- continue;/* skip current delay value if other chipselects have failed all 8 bytelanes */
+- }
+-
+- pDCTstat->DQSDelay = DQSDelay;
+- mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start);
+- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
+-
+- if (pDCTstat->Direction == DQS_WRITEDIR) {
+- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5);
+- WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
+- }
+-
+- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", pDCTstat->Pattern, 5);
+- ReadDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
+- /* print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); */
+- word = CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); /* 0=fail, 1=pass */
+- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 compare 1 ", word, 3);
+-
+- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 DqsRdWrPos_Saved ", pDCTstat->DqsRdWrPos_Saved, 3);
+- word &= ~(pDCTstat->DqsRdWrPos_Saved); /* mask out bytelanes that already passed */
+- word &= ~(pDCTstat->DqsRdWrPos_Saved << 8);
+- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 compare 2 ", word, 3);
+-
+- tmp = DQSDelay;
+- if (pDCTstat->Direction == DQS_READDIR) {
+- MutualCSPassW[tmp] &= word >> 8;
+- tmp += dqsDelay_end;
+- }
+- MutualCSPassW[tmp] &= word & 0xFF;
+-
+- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 \tMutualCSPassW ", MutualCSPassW[DQSDelay], 5);
+-
+- SetTargetWTIO_D(TestAddr);
+- FlushDQSTestPattern_D(pDCTstat, TestAddr << 8);
+- ResetTargetWTIO_D();
+- }
+-
+- }
+-
+- if (pDCTstat->Direction == DQS_READDIR) {
+- dqsDelay_end <<= 1;
+- }
+-
+- if (BanksPresent) {
+- #if 0 /* show the bitmap */
+- for (ByteLane = 0; ByteLane < 8; ByteLane++) { /* just print ByteLane 0 */
+- for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) {
+- if (!(MutualCSPassW[DQSDelay] &(1 << ByteLane))) {
+- printk(BIOS_DEBUG, ".");
+- } else {
+- printk(BIOS_DEBUG, "*");
+- }
+- }
+- printk(BIOS_DEBUG, "\n");
+- }
+- #endif
+- for (ByteLane = 0; ByteLane < 8; ByteLane++) {
+- print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4);
+- if (!(pDCTstat->DqsRdWrPos_Saved &(1 << ByteLane))) {
+- pDCTstat->ByteLane = ByteLane;
+- LastTest = DQS_FAIL; /* Analyze the results */
+- LastTestTot = DQS_FAIL;
+- /* RnkDlySeqPassMin = 0; */
+- /* RnkDlySeqPassMax = 0; */
+- RnkDlyFilterMax = 0;
+- RnkDlyFilterMin = 0;
+- RnkDlyFilterMaxTot = 0;
+- RnkDlyFilterMinTot = 0;
+- for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) {
+- if (MutualCSPassW[DQSDelay] & (1 << ByteLane)) {
+- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5);
+- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
+- if (pDCTstat->Direction == DQS_READDIR)
+- tmp = 0x20;
+- else
+- tmp = 0;
+- if (DQSDelay >= tmp) {
+- RnkDlySeqPassMax = DQSDelay;
+- if (LastTest == DQS_FAIL) {
+- RnkDlySeqPassMin = DQSDelay; /* start sequential run */
+- }
+- if ((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){
+- RnkDlyFilterMin = RnkDlySeqPassMin;
+- RnkDlyFilterMax = RnkDlySeqPassMax;
+- }
+- LastTest = DQS_PASS;
+- }
+-
+- if (pDCTstat->Direction == DQS_READDIR) {
+- RnkDlySeqPassMaxTot = DQSDelay;
+- if (LastTestTot == DQS_FAIL)
+- RnkDlySeqPassMinTot = DQSDelay;
+- if ((RnkDlySeqPassMaxTot - RnkDlySeqPassMinTot)>(RnkDlyFilterMaxTot-RnkDlyFilterMinTot)){
+- RnkDlyFilterMinTot = RnkDlySeqPassMinTot;
+- RnkDlyFilterMaxTot = RnkDlySeqPassMaxTot;
+- }
+- LastTestTot = DQS_PASS;
+- }
+- } else {
+- LastTest = DQS_FAIL;
+- LastTestTot = DQS_FAIL;
+- }
+- }
+- print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4);
+- if (RnkDlySeqPassMax == 0) {
+- Errors |= 1 << SB_NODQSPOS; /* no passing window */
+- } else {
+- print_debug_dqs_pair("\t\t\t\tTrainDQSPos: 34 RnkDlyFilter: ", RnkDlyFilterMin, " ", RnkDlyFilterMax, 4);
+- if (((RnkDlyFilterMax - RnkDlyFilterMin) < MIN_DQS_WNDW)){
+- Errors |= 1 << SB_SMALLDQS;
+- } else {
+- u8 middle_dqs;
+- /* mctEngDQSwindow_Save_D Not required for arrays */
+- if (pDCTstat->Direction == DQS_READDIR)
+- middle_dqs = MiddleDQS_D(RnkDlyFilterMinTot, RnkDlyFilterMaxTot);
+- else
+- middle_dqs = MiddleDQS_D(RnkDlyFilterMin, RnkDlyFilterMax);
+- pDCTstat->DQSDelay = middle_dqs;
+- mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, cs_start); /* load the register with the value */
+- if (pDCTstat->Direction == DQS_READDIR)
+- StoreWrRdDQSDatStrucVal_D(pMCTstat, pDCTstat, cs_start, RnkDlyFilterMinTot, RnkDlyFilterMaxTot); /* store the value into the data structure */
+- else
+- StoreWrRdDQSDatStrucVal_D(pMCTstat, pDCTstat, cs_start, RnkDlyFilterMin, RnkDlyFilterMax); /* store the value into the data structure */
+- print_debug_dqs("\t\t\t\tTrainDQSPos: 42 middle_dqs : ",middle_dqs, 4);
+- pDCTstat->DqsRdWrPos_Saved |= 1 << ByteLane;
+- }
+- }
+- }
+- } /* if (pDCTstat->DqsRdWrPos_Saved &(1 << ByteLane)) */
+- }
+-/* skipLocMiddle: */
+- pDCTstat->TrainErrors = Errors;
+-
+- print_debug_dqs("\t\t\tTrainDQSPos: Errors ", Errors, 3);
+-}
+-
+-static void mctEngDQSwindow_Save_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat, u8 ChipSel,
+- u8 RnkDlyFilterMin, u8 RnkDlyFilterMax)
+-{
+- pDCTstat->CH_D_DIR_MaxMin_B_Dly[pDCTstat->Channel]
+- [pDCTstat->Direction]
+- [0]
+- [pDCTstat->ByteLane] = RnkDlyFilterMin;
+- pDCTstat->CH_D_DIR_MaxMin_B_Dly[pDCTstat->Channel]
+- [pDCTstat->Direction]
+- [1]
+- [pDCTstat->ByteLane] = RnkDlyFilterMax;
+-}
+-
+ static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 ChipSel)
+ {
+@@ -679,26 +834,6 @@ static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
+ pDCTstat->DQSDelay;
+ }
+
+-static void StoreWrRdDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat, u8 ChipSel,
+- u8 RnkDlyFilterMin, u8 RnkDlyFilterMax)
+-{
+- u8 dn;
+-
+- if (pDCTstat->Direction == DQS_WRITEDIR) {
+- dn = ChipSel >> 1;
+- RnkDlyFilterMin += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane];
+- RnkDlyFilterMax += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane];
+- pDCTstat->DQSDelay += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane];
+- } else {
+- RnkDlyFilterMin <<= 1;
+- RnkDlyFilterMax <<= 1;
+- pDCTstat->DQSDelay <<= 1;
+- }
+- mctEngDQSwindow_Save_D(pMCTstat, pDCTstat, ChipSel, RnkDlyFilterMin, RnkDlyFilterMax);
+- StoreDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
+-}
+-
+ static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 ChipSel)
+ {
+@@ -720,33 +855,6 @@ static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
+
+ /* FindDQSDatDimmVal_D is not required since we use an array */
+
+-static u8 MiddleDQS_D(u8 min, u8 max)
+-{
+- u8 size;
+- size = max-min;
+- if (size % 2)
+- size++; /* round up if the size isn't even. */
+- return ( min + (size >> 1));
+-}
+-
+-static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat,
+- u8 cs_start)
+-{
+- print_debug_dqs("\t\tTrainReadPos ", 0, 2);
+- pDCTstat->Direction = DQS_READDIR;
+- TrainDQSPos_D(pMCTstat, pDCTstat, cs_start);
+-}
+-
+-static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat,
+- u8 cs_start)
+-{
+- pDCTstat->Direction = DQS_WRITEDIR;
+- print_debug_dqs("\t\tTrainWritePos", 0, 2);
+- TrainDQSPos_D(pMCTstat, pDCTstat, cs_start);
+-}
+-
+ static void proc_IOCLFLUSH_D(u32 addr_hi)
+ {
+ SetTargetWTIO_D(addr_hi);
+@@ -963,30 +1071,6 @@ static void ResetTargetWTIO_D(void)
+ _WRMSR(0xc0010017, lo, hi); /* IORR0 Mask */
+ }
+
+-static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat,
+- u32 TestAddr_lo)
+-{
+- /* Read a pattern of 72 bit times (per DQ), to test dram functionality.
+- * The pattern is a stress pattern which exercises both ISI and
+- * crosstalk. The number of cache lines to fill is dependent on DCT
+- * width mode and burstlength.
+- * Mode BL Lines Pattern no.
+- * ----+---+-------------------
+- * 64 4 9 0
+- * 64 8 9 0
+- * 64M 4 9 0
+- * 64M 8 9 0
+- * 128 4 18 1
+- * 128 8 N/A -
+- */
+- if (pDCTstat->Pattern == 0)
+- ReadL9TestPattern(TestAddr_lo);
+- else
+- ReadL18TestPattern(TestAddr_lo);
+- _MFENCE;
+-}
+-
+ u32 SetUpperFSbase(u32 addr_hi)
+ {
+ /* Set the upper 32-bits of the Base address, 4GB aligned) for the
+@@ -1009,8 +1093,6 @@ void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index)
+ Set_NB32_index_wait(dev, index_reg, index, val);
+ }
+
+-/* mctEngDQSwindow_Save_D not required with arrays */
+-
+ void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstatA)
+ {
+@@ -1021,8 +1103,8 @@ void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
+ for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
+ pDCTstat = pDCTstatA + Node;
+ if (pDCTstat->DCTSysLimit) {
++ TrainDQSRdWrPos_D(pMCTstat, pDCTstat);
+ for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
+- TrainDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel);
+ SetEccDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel);
+ }
+ }
+@@ -1137,27 +1219,6 @@ static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat,
+ }
+ }
+
+-/*
+- * mct_SetDQSDelayAllCSR_D:
+- * Write the Delay value to all eight byte lanes.
+- */
+-static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat,
+- u8 cs_start)
+-{
+- u8 ByteLane;
+- u8 ChipSel = cs_start;
+-
+- for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) {
+- if ( mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) {
+- for (ByteLane = 0; ByteLane < 8; ByteLane++) {
+- pDCTstat->ByteLane = ByteLane;
+- mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, ChipSel);
+- }
+- }
+- }
+-}
+-
+ u8 mct_RcvrRankEnabled_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat,
+ u8 Channel, u8 ChipSel)
+@@ -1196,7 +1257,7 @@ u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat,
+ reg = 0x40 + (receiver << 2) + reg_off;
+ val = Get_NB32(dev, reg);
+
+- val &= ~0x0F;
++ val &= ~0xe007c01f;
+
+ /* unganged mode DCT0+DCT1, sys addr of DCT1=node
+ * base+DctSelBaseAddr+local ca base*/
+@@ -1277,6 +1338,7 @@ exitGetAddrWNoError:
+ print_debug_dqs("mct_GetMCTSysAddr_D: base_addr ", val, 2);
+ print_debug_dqs("mct_GetMCTSysAddr_D: valid ", *valid, 2);
+ print_debug_dqs("mct_GetMCTSysAddr_D: status ", pDCTstat->Status, 2);
++ print_debug_dqs("mct_GetMCTSysAddr_D: SysBase ", pDCTstat->DCTSysBase, 2);
+ print_debug_dqs("mct_GetMCTSysAddr_D: HoleBase ", pDCTstat->DCTHoleBase, 2);
+ print_debug_dqs("mct_GetMCTSysAddr_D: Cachetop ", pMCTstat->Sub4GCacheTop, 2);
+
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
+index 528c782..60bc01d 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -25,7 +26,6 @@ static void EnableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStr
+ static void DisableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
+ static void PrepareC_MCT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
+ static void PrepareC_DCT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct);
+-static void MultiplyDelay(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct);
+ static void Restore_OnDimmMirror(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
+ static void Clear_OnDimmMirror(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
+
+@@ -154,7 +154,6 @@ static void PhyWLPass2(struct MCTStatStruc *pMCTstat,
+ Clear_OnDimmMirror(pMCTstat, pDCTstat);
+ SetDllSpeedUp_D(pMCTstat, pDCTstat, dct);
+ DisableAutoRefresh_D(pMCTstat, pDCTstat);
+- MultiplyDelay(pMCTstat, pDCTstat, dct);
+ for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm ++) {
+ if (DIMMValid & (1 << (dimm << 1)))
+ AgesaHwWlPhase1(pDCTstat->C_MCTPtr, pDCTstat->C_DCTPtr[dct], dimm, SecondPass);
+@@ -162,6 +161,9 @@ static void PhyWLPass2(struct MCTStatStruc *pMCTstat,
+ }
+ }
+
++/* Write Levelization Training
++ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.1
++ */
+ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat)
+ {
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c
+index 3d625de..596fb23 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -201,12 +202,13 @@ static void SetMTRRrange_D(u32 Base, u32 *pLimit, u32 *pMtrrAddr, u16 MtrrType)
+
+ void UMAMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA)
+ {
+-/* UMA memory size may need splitting the MTRR configuration into two
+- Before training use NB_BottomIO or the physical memory size to set the MTRRs.
+- After training, add UMAMemTyping function to reconfigure the MTRRs based on
+- NV_BottomUMA (for UMA systems only).
+- This two-step process allows all memory to be cached for training
+-*/
++ /* UMA memory size may need splitting the MTRR configuration into two
++ * Before training use NB_BottomIO or the physical memory size to set the MTRRs.
++ * After training, add UMAMemTyping function to reconfigure the MTRRs based on
++ * NV_BottomUMA (for UMA systems only).
++ * This two-step process allows all memory to be cached for training
++ */
++
+ u32 Bottom32bIO, Cache32bTOP;
+ u32 val;
+ u32 addr;
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c
+index 013a1b9..6f97061 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -140,7 +141,7 @@ void InterleaveNodes_D(struct MCTStatStruc *pMCTstat,
+ }
+
+ if (DoIntlv) {
+- MCTMemClr_D(pMCTstat,pDCTstatA);
++ MCTMemClr_D(pMCTstat, pDCTstatA);
+ /* Program Interleaving enabled on Node 0 map only.*/
+ MemSize0 <<= bsf(Nodes); /* MemSize=MemSize*2 (or 4, or 8) */
+ Dct0MemSize <<= bsf(Nodes);
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c
+index da2f372..cda9c6b 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -36,10 +37,10 @@ u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2)
+ val = Get_NB32(pDCTstat->dev_dct, dct * 0x100 + 0x78);
+
+ val &= 7;
+- val = ((~val) & 0xFF) + 1;
++ val = ((~val) & 0xff) + 1;
+ val += 6;
+- val &= 0xFF;
+- misc2 &= 0xFFF8FFFF;
++ val &= 0x7;
++ misc2 &= 0xfff8ffff;
+ misc2 |= val << 16; /* DataTxFifoWrDly */
+ if (pDCTstat->LogicalCPUID & AMD_DR_Dx)
+ misc2 |= 1 << 7; /* ProgOdtEn */
+@@ -52,11 +53,15 @@ void mct_ExtMCTConfig_Cx(struct DCTStatStruc *pDCTstat)
+ u32 val;
+
+ if (pDCTstat->LogicalCPUID & (AMD_DR_Cx)) {
+- Set_NB32(pDCTstat->dev_dct, 0x11C, 0x0CE00FC0 | 1 << 29/* FlushWrOnStpGnt */);
++ /* Revision C */
++ Set_NB32(pDCTstat->dev_dct, 0x11c, 0x0ce00fc0 | 1 << 29/* FlushWrOnStpGnt */);
++ }
+
+- val = Get_NB32(pDCTstat->dev_dct, 0x1B0);
+- val &= 0xFFFFF8C0;
++ if (pDCTstat->LogicalCPUID & (AMD_DR_Cx)) {
++ val = Get_NB32(pDCTstat->dev_dct, 0x1b0);
++ val &= ~0x73f;
+ val |= 0x101; /* BKDG recommended settings */
+- Set_NB32(pDCTstat->dev_dct, 0x1B0, val);
++
++ Set_NB32(pDCTstat->dev_dct, 0x1b0, val);
+ }
+ }
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
+index 6de2f4e..b21b96a 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -172,6 +173,7 @@ static u32 mct_MR1(struct MCTStatStruc *pMCTstat,
+ ret |= 1 << 11;
+ }
+
++ /* program MrsAddress[12]=QOFF: based on F2x[1,0]84[Qoff] */
+ if (dword & (1 << 13))
+ ret |= 1 << 12;
+
+@@ -199,7 +201,8 @@ static u32 mct_MR0(struct MCTStatStruc *pMCTstat,
+ /* program MrsAddress[6:4,2]=read CAS latency
+ (CL):based on F2x[1,0]88[Tcl] */
+ dword2 = Get_NB32(dev, reg_off + 0x88);
+- ret |= (dword2 & 0xF) << 4; /* F2x88[3:0] to MrsAddress[6:4,2]=xxx0b */
++ ret |= (dword2 & 0x7) << 4; /* F2x88[2:0] to MrsAddress[6:4] */
++ ret |= ((dword2 & 0x8) >> 3) << 2; /* F2x88[3] to MrsAddress[2] */
+
+ /* program MrsAddress[12]=0 (PPD):slow exit */
+ if (dword & (1 << 23))
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
+index 8e5c268..587c414 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -24,25 +25,13 @@
+
+ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 Pass);
+-static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
+- u8 rcvrEnDly, u8 Channel,
+- u8 receiver, u8 Pass);
+-static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat,
+- u32 addr, u8 channel,
+- u8 pattern, u8 Pass);
+ static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat);
+ static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 Channel);
+ static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 Channel);
+-static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat,
+- u8 RcvrEnDly, u8 where,
+- u8 Channel, u8 Receiver,
+- u32 dev, u32 index_reg,
+- u8 Addl_Index, u8 Pass);
+-static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly);
++static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly);
+ static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct);
+ static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat);
+@@ -50,17 +39,17 @@ static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat);
+ /* Warning: These must be located so they do not cross a logical 16-bit
+ segment boundary! */
+ static const u32 TestPattern0_D[] = {
+- 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
+- 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
+- 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
+- 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
+-};
+-static const u32 TestPattern1_D[] = {
+ 0x55555555, 0x55555555, 0x55555555, 0x55555555,
+ 0x55555555, 0x55555555, 0x55555555, 0x55555555,
+ 0x55555555, 0x55555555, 0x55555555, 0x55555555,
+ 0x55555555, 0x55555555, 0x55555555, 0x55555555,
+ };
++static const u32 TestPattern1_D[] = {
++ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
++ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
++ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
++ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
++};
+ static const u32 TestPattern2_D[] = {
+ 0x12345678, 0x87654321, 0x23456789, 0x98765432,
+ 0x59385824, 0x30496724, 0x24490795, 0x99938733,
+@@ -104,16 +93,87 @@ void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat,
+ dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass);
+ }
+
++static void read_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
++{
++ uint8_t lane;
++ uint32_t dword;
++
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ uint32_t wdt_reg;
++ if ((lane == 0) || (lane == 1))
++ wdt_reg = 0x30;
++ if ((lane == 2) || (lane == 3))
++ wdt_reg = 0x31;
++ if ((lane == 4) || (lane == 5))
++ wdt_reg = 0x40;
++ if ((lane == 6) || (lane == 7))
++ wdt_reg = 0x41;
++ if (lane == 8)
++ wdt_reg = 0x32;
++ wdt_reg += dimm * 3;
++ dword = Get_NB32_index_wait(dev, index_reg, wdt_reg);
++ if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1))
++ current_total_delay[lane] = (dword & 0x00ff0000) >> 16;
++ if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0))
++ current_total_delay[lane] = dword & 0x000000ff;
++ }
++}
++
++static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
++{
++ uint8_t lane;
++ uint32_t dword;
++
++ for (lane = 0; lane < 8; lane++) {
++ uint32_t ret_reg;
++ if ((lane == 0) || (lane == 1))
++ ret_reg = 0x10;
++ if ((lane == 2) || (lane == 3))
++ ret_reg = 0x11;
++ if ((lane == 4) || (lane == 5))
++ ret_reg = 0x20;
++ if ((lane == 6) || (lane == 7))
++ ret_reg = 0x21;
++ ret_reg += dimm * 3;
++ dword = Get_NB32_index_wait(dev, index_reg, ret_reg);
++ if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) {
++ dword &= ~(0x1ff << 16);
++ dword |= (current_total_delay[lane] & 0x1ff) << 16;
++ }
++ if ((lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) {
++ dword &= ~0x1ff;
++ dword |= current_total_delay[lane] & 0x1ff;
++ }
++ Set_NB32_index_wait(dev, index_reg, ret_reg, dword);
++ }
++}
++
++static uint32_t convert_testaddr_and_channel_to_address(struct DCTStatStruc *pDCTstat, uint32_t testaddr, uint8_t channel)
++{
++ SetUpperFSbase(testaddr);
++ testaddr <<= 8;
++
++ if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) {
++ testaddr += 8; /* second channel */
++ }
++
++ return testaddr;
++}
++
++/* DQS Receiver Enable Training
++ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.2
++ */
+ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 Pass)
+ {
+- u8 Channel, RcvrEnDly, RcvrEnDlyRmin;
+- u8 Test0, Test1, CurrTest, CurrTestSide0, CurrTestSide1;
+- u8 CTLRMaxDelay, _2Ranks, PatternA, PatternB;
++ u8 Channel;
++ u8 _2Ranks;
+ u8 Addl_Index = 0;
+ u8 Receiver;
+ u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
+- u8 RcvrEnDlyLimit, Final_Value, MaxDelay_CH[2];
++ u8 Final_Value;
++ u16 CTLRMaxDelay;
++ u16 MaxDelay_CH[2];
+ u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B;
+ u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */
+ u32 Errors;
+@@ -127,9 +187,20 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+ u32 cr4;
+ u32 lo, hi;
+
++ uint32_t dword;
++ uint8_t rank;
++ uint8_t lane;
++ uint16_t current_total_delay[MAX_BYTE_LANES];
++ uint16_t candidate_total_delay[8];
++ uint8_t data_test_pass_sr[2][8]; /* [rank][lane] */
++ uint8_t data_test_pass[8]; /* [lane] */
++ uint8_t data_test_pass_prev[8]; /* [lane] */
++ uint8_t window_det_toggle[8];
++ uint8_t trained[8];
++ uint64_t result_qword1;
++ uint64_t result_qword2;
++
+ u8 valid;
+- u32 tmp;
+- u8 LastTest;
+
+ print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0);
+ print_debug_dqs("TrainRcvEn: Pass", Pass, 0);
+@@ -181,33 +252,103 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+
+ Errors = 0;
+ dev = pDCTstat->dev_dct;
+- CTLRMaxDelay = 0;
+
+ for (Channel = 0; Channel < 2; Channel++) {
+ print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1);
+ print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1);
+ pDCTstat->Channel = Channel;
+
++ CTLRMaxDelay = 0;
+ MaxDelay_CH[Channel] = 0;
+ index_reg = 0x98 + 0x100 * Channel;
+
+ Receiver = mct_InitReceiver_D(pDCTstat, Channel);
+- /* There are four receiver pairs, loosely associated with chipselects. */
++ /* There are four receiver pairs, loosely associated with chipselects.
++ * This is essentially looping over each DIMM.
++ */
+ for (; Receiver < 8; Receiver += 2) {
+ Addl_Index = (Receiver >> 1) * 3 + 0x10;
+- LastTest = DQS_FAIL;
+-
+- /* mct_ModifyIndex_D */
+- RcvrEnDlyRmin = RcvrEnDlyLimit = 0xff;
+
+ print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2);
+
+- if(!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
+ continue;
+ }
+
++ /* Clear data structures */
++ for (lane = 0; lane < 8; lane++) {
++ data_test_pass_prev[lane] = 0;
++ trained[lane] = 0;
++ }
++
++ /* 2.8.9.9.2 (1, 6)
++ * Retrieve gross and fine timing fields from write DQS registers
++ */
++ read_dqs_write_timing_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
++
++ /* 2.8.9.9.2 (1)
++ * Program the Write Data Timing and Write ECC Timing register to
++ * the values stored in the DQS Write Timing Control register
++ * for each lane
++ */
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ uint32_t wdt_reg;
++
++ /* Calculate Write Data Timing register location */
++ if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
++ wdt_reg = 0x1;
++ if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
++ wdt_reg = 0x2;
++ if (lane == 8)
++ wdt_reg = 0x3;
++ wdt_reg |= ((Receiver / 2) << 8);
++
++ /* Set Write Data Timing register values */
++ dword = Get_NB32_index_wait(dev, index_reg, wdt_reg);
++ if ((lane == 7) || (lane == 3)) {
++ dword &= ~(0x7f << 24);
++ dword |= (current_total_delay[lane] & 0x7f) << 24;
++ }
++ if ((lane == 6) || (lane == 2)) {
++ dword &= ~(0x7f << 16);
++ dword |= (current_total_delay[lane] & 0x7f) << 16;
++ }
++ if ((lane == 5) || (lane == 1)) {
++ dword &= ~(0x7f << 8);
++ dword |= (current_total_delay[lane] & 0x7f) << 8;
++ }
++ if ((lane == 8) || (lane == 4) || (lane == 0)) {
++ dword &= ~0x7f;
++ dword |= current_total_delay[lane] & 0x7f;
++ }
++ Set_NB32_index_wait(dev, index_reg, wdt_reg, dword);
++ }
++
++ /* 2.8.9.9.2 (2)
++ * Program the Read DQS Timing Control and the Read DQS ECC Timing Control registers
++ * to 1/2 MEMCLK for all lanes
++ */
++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
++ uint32_t rdt_reg;
++ if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
++ rdt_reg = 0x5;
++ if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
++ rdt_reg = 0x6;
++ if (lane == 8)
++ rdt_reg = 0x7;
++ rdt_reg |= ((Receiver / 2) << 8);
++ if (lane == 8)
++ dword = 0x0000003f;
++ else
++ dword = 0x3f3f3f3f;
++ Set_NB32_index_wait(dev, index_reg, rdt_reg, dword);
++ }
++
++ /* 2.8.9.9.2 (3)
++ * Select two test addresses for each rank present
++ */
+ TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid);
+- if(!valid) { /* Address not supported on current CS */
++ if (!valid) { /* Address not supported on current CS */
+ continue;
+ }
+
+@@ -229,171 +370,215 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+ print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2);
+ print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2);
+
+- /*
+- * Get starting RcvrEnDly value
++ /* 2.8.9.9.2 (4, 5)
++ * Write 1 cache line of the appropriate test pattern to each test addresse
+ */
+- RcvrEnDly = mct_Get_Start_RcvrEnDly_1Pass(Pass);
++ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, 0); /* rank 0 of DIMM, testpattern 0 */
++ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, 1); /* rank 0 of DIMM, testpattern 1 */
++ if (_2Ranks) {
++ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, 0); /*rank 1 of DIMM, testpattern 0 */
++ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, 1); /*rank 1 of DIMM, testpattern 1 */
++ }
+
+- /* mct_GetInitFlag_D*/
+- if (Pass == FirstPass) {
+- pDCTstat->DqsRcvEn_Pass = 0;
+- } else {
+- pDCTstat->DqsRcvEn_Pass=0xFF;
++#if DQS_TRAIN_DEBUG > 0
++ for (lane = 0; lane < 8; lane++) {
++ print_debug_dqs("\t\tTrainRcvEn54: lane: ", lane, 2);
++ print_debug_dqs("\t\tTrainRcvEn54: current_total_delay ", current_total_delay[lane], 2);
+ }
+- pDCTstat->DqsRcvEn_Saved = 0;
++#endif
+
++ /* 2.8.9.9.2 (6)
++ * Write gross and fine timing fields to read DQS registers
++ */
++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
++
++ /* 2.8.9.9.2 (7)
++ * Loop over all delay values up to 1 MEMCLK (0x40 delay steps) from the initial delay values
++ *
++ * FIXME
++ * It is not clear if training should be discontinued if any test failures occur in the first
++ * 1 MEMCLK window, or if it should be discontinued if no successes occur in the first 1 MEMCLK
++ * window. Therefore, loop over up to 2 MEMCLK (0x80 delay steps) to be on the safe side.
++ */
++ uint16_t current_delay_step;
+
+- while(RcvrEnDly < RcvrEnDlyLimit) { /* sweep Delay value here */
+- print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
++ for (current_delay_step = 0; current_delay_step < 0x80; current_delay_step++) {
++ print_debug_dqs("\t\t\tTrainRcvEn541: current_delay_step ", current_delay_step, 3);
+
+- /* callback not required
+- if(mct_AdjustDelay_D(pDCTstat, RcvrEnDly))
+- goto skipDly;
++ /* 2.8.9.9.2 (7 D)
++ * Terminate if all lanes are trained
+ */
++ uint8_t all_lanes_trained = 1;
++ for (lane = 0; lane < 8; lane++)
++ if (!trained[lane])
++ all_lanes_trained = 0;
+
+- /* Odd steps get another pattern such that even
+- and odd steps alternate. The pointers to the
+- patterns will be swaped at the end of the loop
+- so that they correspond. */
+- if(RcvrEnDly & 1) {
+- PatternA = 1;
+- PatternB = 0;
+- } else {
+- /* Even step */
+- PatternA = 0;
+- PatternB = 1;
+- }
+-
+- mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, PatternA); /* rank 0 of DIMM, testpattern 0 */
+- mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, PatternB); /* rank 0 of DIMM, testpattern 1 */
+- if(_2Ranks) {
+- mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, PatternA); /*rank 1 of DIMM, testpattern 0 */
+- mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, PatternB); /*rank 1 of DIMM, testpattern 1 */
+- }
+-
+- mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, 0, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
+-
+- CurrTest = DQS_FAIL;
+- CurrTestSide0 = DQS_FAIL;
+- CurrTestSide1 = DQS_FAIL;
+-
+- mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0); /*cache fills */
+- Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0, Channel, PatternA, Pass);/* ROM vs cache compare */
+- proc_IOCLFLUSH_D(TestAddr0);
+- ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
+-
+- print_debug_dqs("\t\t\tTrainRcvEn542: Test0 result ", Test0, 3);
+-
+- /* != 0x00 mean pass */
+-
+- if(Test0 == DQS_PASS) {
+- mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B); /*cache fills */
+- /* ROM vs cache compare */
+- Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0B, Channel, PatternB, Pass);
+- proc_IOCLFLUSH_D(TestAddr0B);
+- ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
+-
+- print_debug_dqs("\t\t\tTrainRcvEn543: Test1 result ", Test1, 3);
++ if (all_lanes_trained)
++ break;
+
+- if(Test1 == DQS_PASS) {
+- CurrTestSide0 = DQS_PASS;
++ /* 2.8.9.9.2 (7 A)
++ * Loop over all ranks
++ */
++ for (rank = 0; rank < (_2Ranks + 1); rank++) {
++ /* 2.8.9.9.2 (7 A a-d)
++ * Read the first test address of the current rank
++ * Store the first data beat for analysis
++ * Reset read pointer in the DRAM controller FIFO
++ * Read the second test address of the current rank
++ * Store the first data beat for analysis
++ * Reset read pointer in the DRAM controller FIFO
++ */
++ if (rank & 1) {
++ /* 2.8.9.9.2 (7 D)
++ * Invert read instructions to alternate data read order on the bus
++ */
++ proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B);
++ result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel));
++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
++ proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1);
++ result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel));
++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
++ } else {
++ proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1);
++ result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel));
++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
++ proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B);
++ result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel));
++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
+ }
+- }
+- if(_2Ranks) {
+- mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1); /*cache fills */
+- /* ROM vs cache compare */
+- Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1, Channel, PatternA, Pass);
+- proc_IOCLFLUSH_D(TestAddr1);
+- ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
+-
+- print_debug_dqs("\t\t\tTrainRcvEn544: Test0 result ", Test0, 3);
+-
+- if(Test0 == DQS_PASS) {
+- mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B); /*cache fills */
+- /* ROM vs cache compare */
+- Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1B, Channel, PatternB, Pass);
+- proc_IOCLFLUSH_D(TestAddr1B);
+- ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
+-
+- print_debug_dqs("\t\t\tTrainRcvEn545: Test1 result ", Test1, 3);
+- if(Test1 == DQS_PASS) {
+- CurrTestSide1 = DQS_PASS;
++ /* 2.8.9.9.2 (7 A e)
++ * Compare both read patterns and flag passing ranks/lanes
++ */
++ uint8_t result_lane_byte1;
++ uint8_t result_lane_byte2;
++ for (lane = 0; lane < 8; lane++) {
++ if (trained[lane] == 1) {
++#if DQS_TRAIN_DEBUG > 0
++ print_debug_dqs("\t\t\t\t\t\t\t\t lane already trained: ", lane, 4);
++#endif
++ continue;
+ }
++
++ result_lane_byte1 = (result_qword1 >> (lane * 8)) & 0xff;
++ result_lane_byte2 = (result_qword2 >> (lane * 8)) & 0xff;
++ if ((result_lane_byte1 == 0x55) && (result_lane_byte2 == 0xaa))
++ data_test_pass_sr[rank][lane] = 1;
++ else
++ data_test_pass_sr[rank][lane] = 0;
++#if DQS_TRAIN_DEBUG > 0
++ print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", 0x55, " | ", result_lane_byte1, 4);
++ print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", 0xaa, " | ", result_lane_byte2, 4);
++#endif
++
+ }
+ }
+
+- if(_2Ranks) {
+- if ((CurrTestSide0 == DQS_PASS) && (CurrTestSide1 == DQS_PASS)) {
+- CurrTest = DQS_PASS;
++ /* 2.8.9.9.2 (7 B)
++ * If DIMM is dual rank, only use delays that pass testing for both ranks
++ */
++ for (lane = 0; lane < 8; lane++) {
++ if (_2Ranks) {
++ if ((data_test_pass_sr[0][lane]) && (data_test_pass_sr[1][lane]))
++ data_test_pass[lane] = 1;
++ else
++ data_test_pass[lane] = 0;
++ } else {
++ data_test_pass[lane] = data_test_pass_sr[0][lane];
+ }
+- } else if (CurrTestSide0 == DQS_PASS) {
+- CurrTest = DQS_PASS;
+ }
+
+- /* record first pass DqsRcvEn to stack */
+- valid = mct_SavePassRcvEnDly_D(pDCTstat, RcvrEnDly, Channel, Receiver, Pass);
++ /* 2.8.9.9.2 (7 E)
++ * For each lane, update the DQS receiver delay setting in support of next iteration
++ */
++ for (lane = 0; lane < 8; lane++) {
++ if (trained[lane] == 1)
++ continue;
++
++ /* 2.8.9.9.2 (7 C a)
++ * Save the total delay of the first success after a failure for later use
++ */
++ if ((data_test_pass[lane] == 1) && (data_test_pass_prev[lane] == 0)) {
++ candidate_total_delay[lane] = current_total_delay[lane];
++ window_det_toggle[lane] = 0;
++ }
+
+- /* Break(1:RevF,2:DR) or not(0) FIXME: This comment deosn't make sense */
+- if(valid == 2 || (LastTest == DQS_FAIL && valid == 1)) {
+- RcvrEnDlyRmin = RcvrEnDly;
+- break;
++ /* 2.8.9.9.2 (7 C b)
++ * If the current delay failed testing add 1/8 UI to the current delay
++ */
++ if (data_test_pass[lane] == 0)
++ current_total_delay[lane] += 0x4;
++
++ /* 2.8.9.9.2 (7 C c)
++ * If the current delay passed testing alternately add either 1/32 UI or 1/4 UI to the current delay
++ * If 1.25 UI of delay have been added with no failures the lane is considered trained
++ */
++ if (data_test_pass[lane] == 1) {
++ /* See if lane is trained */
++ if ((current_total_delay[lane] - candidate_total_delay[lane]) >= 0x28) {
++ trained[lane] = 1;
++
++ /* Calculate and set final lane delay value
++ * The final delay is the candidate delay + 7/8 UI
++ */
++ current_total_delay[lane] = candidate_total_delay[lane] + 0x1c;
++ } else {
++ if (window_det_toggle[lane] == 0) {
++ current_total_delay[lane] += 0x1;
++ window_det_toggle[lane] = 1;
++ } else {
++ current_total_delay[lane] += 0x8;
++ window_det_toggle[lane] = 0;
++ }
++ }
++ }
+ }
+
+- LastTest = CurrTest;
+-
+- /* swap the rank 0 pointers */
+- tmp = TestAddr0;
+- TestAddr0 = TestAddr0B;
+- TestAddr0B = tmp;
+-
+- /* swap the rank 1 pointers */
+- tmp = TestAddr1;
+- TestAddr1 = TestAddr1B;
+- TestAddr1B = tmp;
+-
+- print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
++ /* Update delays in hardware */
++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
+
+- RcvrEnDly++;
+-
+- } /* while RcvrEnDly */
+-
+- print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
+- print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyRmin ", RcvrEnDlyRmin, 3);
+- print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyLimit ", RcvrEnDlyLimit, 3);
+- if(RcvrEnDlyRmin == RcvrEnDlyLimit) {
+- /* no passing window */
+- pDCTstat->ErrStatus |= 1 << SB_NORCVREN;
+- Errors |= 1 << SB_NORCVREN;
+- pDCTstat->ErrCode = SC_FatalErr;
++ /* Save previous results for comparison in the next iteration */
++ for (lane = 0; lane < 8; lane++)
++ data_test_pass_prev[lane] = data_test_pass[lane];
+ }
+
+- if(RcvrEnDly > (RcvrEnDlyLimit - 1)) {
+- /* passing window too narrow, too far delayed*/
+- pDCTstat->ErrStatus |= 1 << SB_SmallRCVR;
+- Errors |= 1 << SB_SmallRCVR;
+- pDCTstat->ErrCode = SC_FatalErr;
+- RcvrEnDly = RcvrEnDlyLimit - 1;
+- pDCTstat->CSTrainFail |= 1 << Receiver;
+- pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel);
+- }
+-
+- /* CHB_D0_B0_RCVRDLY set in mct_Average_RcvrEnDly_Pass */
+- mct_Average_RcvrEnDly_Pass(pDCTstat, RcvrEnDly, RcvrEnDlyLimit, Channel, Receiver, Pass);
+-
+- mct_SetFinalRcvrEnDly_D(pDCTstat, RcvrEnDly, Final_Value, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
++#if DQS_TRAIN_DEBUG > 0
++ for (lane = 0; lane < 8; lane++)
++ print_debug_dqs_pair("\t\tTrainRcvEn55: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2);
++#endif
+
+- if(pDCTstat->ErrStatus & (1 << SB_SmallRCVR)) {
+- Errors |= 1 << SB_SmallRCVR;
+- }
++ /* Find highest delay value and save for later use */
++ for (lane = 0; lane < 8; lane++)
++ if (current_total_delay[lane] > CTLRMaxDelay)
++ CTLRMaxDelay = current_total_delay[lane];
+
+- RcvrEnDly += Pass1MemClkDly;
+- if(RcvrEnDly > CTLRMaxDelay) {
+- CTLRMaxDelay = RcvrEnDly;
++ /* See if any lanes failed training, and set error flags appropriately
++ * For all trained lanes, save delay values for later use
++ */
++ for (lane = 0; lane < 8; lane++) {
++ if (trained[lane]) {
++ pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1][lane] = current_total_delay[lane];
++ } else {
++ printk(BIOS_WARNING, "TrainRcvrEn: WARNING: Lane %d of receiver %d on channel %d failed training!\n", lane, Receiver, Channel);
++
++ /* Set error flags */
++ pDCTstat->ErrStatus |= 1 << SB_NORCVREN;
++ Errors |= 1 << SB_NORCVREN;
++ pDCTstat->ErrCode = SC_FatalErr;
++ pDCTstat->CSTrainFail |= 1 << Receiver;
++ pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel);
++ }
+ }
+
+- } /* while Receiver */
++ /* 2.8.9.9.2 (8)
++ * Flush the receiver FIFO
++ * Write one full cache line of non-0x55/0xaa data to one of the test addresses, then read it back to flush the FIFO
++ */
++
++ WriteLNTestPattern(TestAddr0 << 8, (uint8_t *)TestPattern2_D, 1);
++ mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0);
++ }
+ MaxDelay_CH[Channel] = CTLRMaxDelay;
+- } /* for Channel */
++ }
+
+ CTLRMaxDelay = MaxDelay_CH[0];
+ if (MaxDelay_CH[1] > CTLRMaxDelay)
+@@ -428,31 +613,31 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+
+ #if DQS_TRAIN_DEBUG > 0
+ {
+- u8 Channel;
++ u8 ChannelDTD;
+ printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n");
+- for(Channel = 0; Channel<2; Channel++) {
++ for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) {
+ printk(BIOS_DEBUG, "Channel:%x: %x\n",
+- Channel, pDCTstat->CH_MaxRdLat[Channel]);
++ ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]);
+ }
+ }
+ #endif
+
+ #if DQS_TRAIN_DEBUG > 0
+ {
+- u8 val;
+- u8 Channel, Receiver;
++ u16 valDTD;
++ u8 ChannelDTD, ReceiverDTD;
+ u8 i;
+- u8 *p;
++ u16 *p;
+
+ printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n");
+- for(Channel = 0; Channel < 2; Channel++) {
+- printk(BIOS_DEBUG, "Channel:%x\n", Channel);
+- for(Receiver = 0; Receiver<8; Receiver+=2) {
+- printk(BIOS_DEBUG, "\t\tReceiver:%x:", Receiver);
+- p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver>>1];
++ for(ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) {
++ printk(BIOS_DEBUG, "Channel:%x\n", ChannelDTD);
++ for(ReceiverDTD = 0; ReceiverDTD<8; ReceiverDTD+=2) {
++ printk(BIOS_DEBUG, "\t\tReceiver:%x:", ReceiverDTD);
++ p = pDCTstat->CH_D_B_RCVRDLY[ChannelDTD][ReceiverDTD>>1];
+ for (i=0;i<8; i++) {
+- val = p[i];
+- printk(BIOS_DEBUG, "%x ", val);
++ valDTD = p[i];
++ printk(BIOS_DEBUG, " %03x", valDTD);
+ }
+ printk(BIOS_DEBUG, "\n");
+ }
+@@ -475,15 +660,6 @@ u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct)
+ }
+ }
+
+-static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 where, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass/*, u8 *p*/)
+-{
+- /*
+- * Program final DqsRcvEnDly to additional index for DQS receiver
+- * enabled delay
+- */
+- mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, where, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
+-}
+-
+ static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat)
+ {
+ u8 ch_end, ch;
+@@ -514,17 +690,20 @@ static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat)
+ * Function only used once so it was inlined.
+ */
+
+-void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly,
++/* Set F2x[1, 0]9C_x[2B:10] DRAM DQS Receiver Enable Timing Control Registers
++ * See BKDG Rev. 3.62 page 268 for more information
++ */
++void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly,
+ u8 FinalValue, u8 Channel, u8 Receiver, u32 dev,
+ u32 index_reg, u8 Addl_Index, u8 Pass)
+ {
+ u32 index;
+ u8 i;
+- u8 *p;
++ u16 *p;
+ u32 val;
+
+- if(RcvrEnDly == 0xFE) {
+- /*set the boudary flag */
++ if(RcvrEnDly == 0x1fe) {
++ /*set the boundary flag */
+ pDCTstat->Status |= 1 << SB_DQSRcvLimit;
+ }
+
+@@ -543,27 +722,57 @@ void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly,
+ val = Get_NB32_index_wait(dev, index_reg, index);
+ if(i & 1) {
+ /* odd byte lane */
+- val &= ~(0xFF << 16);
+- val |= (RcvrEnDly << 16);
++ val &= ~(0x1ff << 16);
++ val |= ((RcvrEnDly & 0x1ff) << 16);
+ } else {
+ /* even byte lane */
+- val &= ~0xFF;
+- val |= RcvrEnDly;
++ val &= ~0x1ff;
++ val |= (RcvrEnDly & 0x1ff);
+ }
+ Set_NB32_index_wait(dev, index_reg, index, val);
+ }
+
+ }
+
+-static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly)
++/* Calculate MaxRdLatency
++ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.5
++ */
++static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly)
+ {
+ u32 dev;
+ u32 reg;
+- u16 SubTotal;
++ u32 SubTotal;
+ u32 index_reg;
+ u32 reg_off;
+ u32 val;
+- u32 valx;
++
++ uint8_t cpu_val_n;
++ uint8_t cpu_val_p;
++
++ u16 freq_tab[] = {400, 533, 667, 800};
++
++ /* Set up processor-dependent values */
++ if (pDCTstat->LogicalCPUID & AMD_DR_Dx) {
++ /* Revision D and above */
++ cpu_val_n = 4;
++ cpu_val_p = 29;
++ } else if (pDCTstat->LogicalCPUID & AMD_DR_Cx) {
++ /* Revision C */
++ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
++ if ((package_type == PT_L1) /* Socket F (1207) */
++ || (package_type == PT_M2) /* Socket AM3 */
++ || (package_type == PT_S1)) { /* Socket S1g<x> */
++ cpu_val_n = 10;
++ cpu_val_p = 11;
++ } else {
++ cpu_val_n = 4;
++ cpu_val_p = 29;
++ }
++ } else {
++ /* Revision B and below */
++ cpu_val_n = 10;
++ cpu_val_p = 11;
++ }
+
+ if(pDCTstat->GangedMode)
+ Channel = 0;
+@@ -598,49 +807,32 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQ
+ val = Get_NB32(dev, 0x78 + reg_off);
+ SubTotal += 8 - (val & 0x0f);
+
+- /* Convert bits 7-5 (also referred to as the course delay) of
++ /* Convert bits 7-5 (also referred to as the coarse delay) of
+ * the current (or worst case) DQS receiver enable delay to
+ * 1/2 MEMCLKs units, rounding up, and add this to the sub-total.
+ */
+- SubTotal += DQSRcvEnDly >> 5; /*BOZO-no rounding up */
++ SubTotal += DQSRcvEnDly >> 5; /* Retrieve gross delay portion of value */
+
+- /* Add 5.5 to the sub-total. 5.5 represents part of the
++ /* Add "P" to the sub-total. "P" represents part of the
+ * processor specific constant delay value in the DRAM
+ * clock domain.
+ */
+ SubTotal <<= 1; /*scale 1/2 MemClk to 1/4 MemClk */
+- SubTotal += 11; /*add 5.5 1/2MemClk */
++ SubTotal += cpu_val_p; /*add "P" 1/2MemClk */
++ SubTotal >>= 1; /*scale 1/4 MemClk back to 1/2 MemClk */
+
+ /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge
+- * clocks (NCLKs) as follows (assuming DDR400 and assuming
+- * that no P-state or link speed changes have occurred).
++ * clocks (NCLKs)
+ */
++ SubTotal *= 200 * ((Get_NB32(pDCTstat->dev_nbmisc, 0xd4) & 0x1f) + 4);
++ SubTotal /= freq_tab[((Get_NB32(pDCTstat->dev_dct, 0x94 + reg_off) & 0x7) - 3)];
++ SubTotal = (SubTotal + (2 - 1)) / 2; /* Round up */
+
+- /* New formula:
+- * SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */
+- val = Get_NB32(dev, 0x94 + reg_off);
+-
+- /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */
+- val &= 7;
+- if (val >= 3) {
+- val <<= 1;
+- } else
+- val += 3;
+- valx = val << 2;
+-
+- val = Get_NB32(pDCTstat->dev_nbmisc, 0xD4);
+- SubTotal *= ((val & 0x1f) + 4 ) * 3;
+-
+- SubTotal /= valx;
+- if (SubTotal % valx) { /* round up */
+- SubTotal++;
+- }
+-
+- /* Add 5 NCLKs to the sub-total. 5 represents part of the
++ /* Add "N" NCLKs to the sub-total. "N" represents part of the
+ * processor specific constant value in the northbridge
+ * clock domain.
+ */
+- SubTotal += 5;
++ SubTotal += (cpu_val_n) / 2;
+
+ pDCTstat->CH_MaxRdLat[Channel] = SubTotal;
+ if(pDCTstat->GangedMode) {
+@@ -659,143 +851,6 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQ
+ Set_NB32(dev, reg, val);
+ }
+
+-static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
+- u8 rcvrEnDly, u8 Channel,
+- u8 receiver, u8 Pass)
+-{
+- u8 i;
+- u8 mask_Saved, mask_Pass;
+- u8 *p;
+-
+- /* calculate dimm offset
+- * not needed for CH_D_B_RCVRDLY array
+- */
+-
+- /* cmp if there has new DqsRcvEnDly to be recorded */
+- mask_Pass = pDCTstat->DqsRcvEn_Pass;
+-
+- if(Pass == SecondPass) {
+- mask_Pass = ~mask_Pass;
+- }
+-
+- mask_Saved = pDCTstat->DqsRcvEn_Saved;
+- if(mask_Pass != mask_Saved) {
+-
+- /* find desired stack offset according to channel/dimm/byte */
+- if(Pass == SecondPass) {
+- /* FIXME: SecondPass is never used for Barcelona p = pDCTstat->CH_D_B_RCVRDLY_1[Channel][receiver>>1]; */
+- p = 0; /* Keep the compiler happy. */
+- } else {
+- mask_Saved &= mask_Pass;
+- p = pDCTstat->CH_D_B_RCVRDLY[Channel][receiver>>1];
+- }
+- for(i=0; i < 8; i++) {
+- /* cmp per byte lane */
+- if(mask_Pass & (1 << i)) {
+- if(!(mask_Saved & (1 << i))) {
+- /* save RcvEnDly to stack, according to
+- the related Dimm/byte lane */
+- p[i] = (u8)rcvrEnDly;
+- mask_Saved |= 1 << i;
+- }
+- }
+- }
+- pDCTstat->DqsRcvEn_Saved = mask_Saved;
+- }
+- return mct_SaveRcvEnDly_D_1Pass(pDCTstat, Pass);
+-}
+-
+-static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat,
+- u32 addr, u8 channel,
+- u8 pattern, u8 Pass)
+-{
+- /* Compare only the first beat of data. Since target addrs are cache
+- * line aligned, the Channel parameter is used to determine which
+- * cache QW to compare.
+- */
+-
+- u8 *test_buf;
+- u8 i;
+- u8 result;
+- u8 value;
+-
+- if(Pass == FirstPass) {
+- if(pattern==1) {
+- test_buf = (u8 *)TestPattern1_D;
+- } else {
+- test_buf = (u8 *)TestPattern0_D;
+- }
+- } else { /* Second Pass */
+- test_buf = (u8 *)TestPattern2_D;
+- }
+-
+- SetUpperFSbase(addr);
+- addr <<= 8;
+-
+- if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) {
+- addr += 8; /* second channel */
+- test_buf += 8;
+- }
+-
+- print_debug_dqs_pair("\t\t\t\t\t\t test_buf = ", (u32)test_buf, " | addr_lo = ", addr, 4);
+- for (i=0; i<8; i++, addr ++) {
+- value = read32_fs(addr);
+- print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", test_buf[i], " | ", value, 4);
+-
+- if (value == test_buf[i]) {
+- pDCTstat->DqsRcvEn_Pass |= (1<<i);
+- } else {
+- pDCTstat->DqsRcvEn_Pass &= ~(1<<i);
+- }
+- }
+-
+- result = DQS_FAIL;
+-
+- if (Pass == FirstPass) {
+- /* if first pass, at least one byte lane pass
+- * ,then DQS_PASS=1 and will set to related reg.
+- */
+- if(pDCTstat->DqsRcvEn_Pass != 0) {
+- result = DQS_PASS;
+- } else {
+- result = DQS_FAIL;
+- }
+-
+- } else {
+- /* if second pass, at least one byte lane fail
+- * ,then DQS_FAIL=1 and will set to related reg.
+- */
+- if(pDCTstat->DqsRcvEn_Pass != 0xFF) {
+- result = DQS_FAIL;
+- } else {
+- result = DQS_PASS;
+- }
+- }
+-
+- /* if second pass, we can't find the fail until FFh,
+- * then let it fail to save the final delay
+- */
+- if((Pass == SecondPass) && (pDCTstat->Status & (1 << SB_DQSRcvLimit))) {
+- result = DQS_FAIL;
+- pDCTstat->DqsRcvEn_Pass = 0;
+- }
+-
+- /* second pass needs to be inverted
+- * FIXME? this could be inverted in the above code to start with...
+- */
+- if(Pass == SecondPass) {
+- if (result == DQS_PASS) {
+- result = DQS_FAIL;
+- } else if (result == DQS_FAIL) { /* FIXME: doesn't need to be else if */
+- result = DQS_PASS;
+- }
+- }
+-
+-
+- return result;
+-}
+-
+ static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat)
+ {
+@@ -854,7 +909,7 @@ void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel)
+ u32 index_reg;
+ u32 index;
+ u8 ChipSel;
+- u8 *p;
++ u16 *p;
+ u32 val;
+
+ dev = pDCTstat->dev_dct;
+@@ -884,7 +939,7 @@ static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
+
+ for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
+ if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) {
+- u8 *p;
++ u16 *p;
+ p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1];
+
+ /* DQS Delay Value of Data Bytelane
+@@ -920,6 +975,10 @@ static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
+ SetEccDQSRcvrEn_D(pDCTstat, Channel);
+ }
+
++/* 2.8.9.9.4
++ * ECC Byte Lane Training
++ * DQS Receiver Enable Delay
++ */
+ void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstatA)
+ {
+@@ -1017,7 +1076,9 @@ static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
+ avRecValue -= 3;
+ else
+ */
+- if (pDCTstat->LogicalCPUID & AMD_DR_Cx)
++ if (pDCTstat->LogicalCPUID & AMD_DR_Dx)
++ avRecValue -= 8;
++ else if (pDCTstat->LogicalCPUID & AMD_DR_Cx)
+ avRecValue -= 8;
+ else if (pDCTstat->LogicalCPUID & AMD_DR_Bx)
+ avRecValue -= 8;
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c
+index c009756..f01e011 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -36,17 +37,12 @@ u32 SetupDqsPattern_1PassB(u8 pass)
+ return (u32) TestPattern0_D;
+ }
+
+-u8 mct_Get_Start_RcvrEnDly_1Pass(u8 pass)
+-{
+- return 0;
+-}
+-
+-static u8 mct_Average_RcvrEnDly_1Pass(struct DCTStatStruc *pDCTstat, u8 Channel, u8 Receiver,
++static u16 mct_Average_RcvrEnDly_1Pass(struct DCTStatStruc *pDCTstat, u8 Channel, u8 Receiver,
+ u8 Pass)
+ {
+- u8 i, MaxValue;
+- u8 *p;
+- u8 val;
++ u16 i, MaxValue;
++ u16 *p;
++ u16 val;
+
+ MaxValue = 0;
+ p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1];
+@@ -76,8 +72,8 @@ u8 mct_SaveRcvEnDly_D_1Pass(struct DCTStatStruc *pDCTstat, u8 pass)
+ return ret;
+ }
+
+-u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat,
+- u8 RcvrEnDly, u8 RcvrEnDlyLimit,
++u16 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat,
++ u16 RcvrEnDly, u16 RcvrEnDlyLimit,
+ u8 Channel, u8 Receiver, u8 Pass)
+
+ {
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c
+index b01889d..796febc 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -74,15 +75,15 @@ u8 mct_Get_Start_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat,
+ return RcvrEnDly;
+ }
+
+-u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat,
+- u8 RcvrEnDly, u8 RcvrEnDlyLimit,
++u16 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat,
++ u16 RcvrEnDly, u16 RcvrEnDlyLimit,
+ u8 Channel, u8 Receiver, u8 Pass)
+ {
+ u8 i;
+- u8 *p;
+- u8 *p_1;
+- u8 val;
+- u8 val_1;
++ u16 *p;
++ u16 *p_1;
++ u16 val;
++ u16 val_1;
+ u8 valid = 1;
+ u8 bn;
+
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c
+index ea5c8c7..920f514 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -191,10 +192,10 @@ static void maxRdLatencyTrain_D(struct MCTStatStruc *pMCTstat,
+
+ #if DQS_TRAIN_DEBUG > 0
+ {
+- u8 Channel;
++ u8 ChannelDTD;
+ printk(BIOS_DEBUG, "maxRdLatencyTrain: CH_MaxRdLat:\n");
+- for(Channel = 0; Channel<2; Channel++) {
+- printk(BIOS_DEBUG, "Channel: %02x: %02x\n", Channel, pDCTstat->CH_MaxRdLat[Channel]);
++ for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) {
++ printk(BIOS_DEBUG, "Channel: %02x: %02x\n", ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]);
+ }
+ }
+ #endif
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c
+index cdeae49..1c3e322 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -58,9 +59,9 @@ void PrepareC_DCT(struct MCTStatStruc *pMCTstat,
+ pDCTstat->C_DCTPtr[dct]->LogicalCPUID = pDCTstat->LogicalCPUID;
+
+ for (dimm = 0; dimm < MAX_DIMMS; dimm++) {
+- if (DimmValid & (1 << dimm))
++ if (DimmValid & (1 << (dimm << 1)))
+ pDCTstat->C_DCTPtr[dct]->DimmPresent[dimm] = 1;
+- if (Dimmx8Present & (1 << dimm))
++ if (Dimmx8Present & (1 << (dimm << 1)))
+ pDCTstat->C_DCTPtr[dct]->DimmX8Present[dimm] = 1;
+ }
+
+@@ -88,9 +89,9 @@ void PrepareC_DCT(struct MCTStatStruc *pMCTstat,
+ u8 DimmRanks;
+ if (DimmValid & (1 << (dimm << 1))) {
+ DimmRanks = 1;
+- if (pDCTstat->DimmDRPresent & (1 << (dimm+dct)))
++ if (pDCTstat->DimmDRPresent & (1 << ((dimm << 1) + dct)))
+ DimmRanks = 2;
+- else if (pDCTstat->DimmQRPresent & (1 << (dimm+dct)))
++ else if (pDCTstat->DimmQRPresent & (1 << ((dimm << 1) + dct)))
+ DimmRanks = 4;
+ } else
+ DimmRanks = 0;
+@@ -249,35 +250,6 @@ static void ChangeMemClk(struct MCTStatStruc *pMCTstat,
+ }
+ }
+
+-/* Multiply the previously saved delay values in Pass 1, step #5 by
+- (target frequency)/400 to find the gross and fine delay initialization
+- values at the target frequency.
+- */
+-void MultiplyDelay(struct MCTStatStruc *pMCTstat,
+- struct DCTStatStruc *pDCTstat, u8 dct)
+-{
+- u16 index;
+- u8 Multiplier;
+- u8 gross, fine;
+- u16 total;
+-
+- Multiplier = pDCTstat->TargetFreq;
+-
+- for (index=0; index < MAX_BYTE_LANES*MAX_LDIMMS; index ++) {
+- gross = pDCTstat->C_DCTPtr[dct]->WLGrossDelay[index];
+- fine = pDCTstat->C_DCTPtr[dct]->WLFineDelay[index];
+-
+- total = gross << 5 | fine;
+- total *= Multiplier;
+- if (total % 3)
+- total = total / 3 + 1;
+- else
+- total = total / 3;
+- pDCTstat->C_DCTPtr[dct]->WLGrossDelay[index] = (total & 0xFF) >> 5;
+- pDCTstat->C_DCTPtr[dct]->WLFineDelay[index] = total & 0x1F;
+- }
+-}
+-
+ /*
+ * the DRAM controller to bring the DRAMs out of self refresh mode.
+ */
+@@ -352,9 +324,9 @@ void SetTargetFreq(struct MCTStatStruc *pMCTstat,
+
+ if (!DCT1Present)
+ pDCTstat->CSPresent = pDCTstat->CSPresent_DCT[0];
+- else if (pDCTstat->GangedMode) {
++ else if (pDCTstat->GangedMode)
+ pDCTstat->CSPresent = 0;
+- } else
++ else
+ pDCTstat->CSPresent = pDCTstat->CSPresent_DCT[1];
+
+ FreqChgCtrlWrd(pMCTstat, pDCTstat);
+diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
+index 212a348..c76476b 100644
+--- a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
++++ b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
+@@ -2,6 +2,7 @@
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -235,6 +236,65 @@ u32 swapBankBits(sDCTStruct *pDCTData, u32 MRSValue)
+ return MRSValue;
+ }
+
++static uint16_t unbuffered_dimm_nominal_termination_emrs(uint8_t number_of_dimms, uint8_t frequency_index, uint8_t rank_count, uint8_t rank)
++{
++ uint16_t term;
++
++ /* FIXME
++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
++ * For now assume a maximum of 2 DIMMs per channel can be installed
++ */
++ uint8_t MaxDimmsInstallable = 2;
++
++ if (number_of_dimms == 1) {
++ if (MaxDimmsInstallable < 3) {
++ term = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */
++ } else {
++ if (rank_count == 1) {
++ term = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */
++ } else {
++ if (rank == 0)
++ term = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */
++ else
++ term = 0x00; /* Rtt_Nom=OFF */
++ }
++ }
++ } else {
++ if (frequency_index < 5)
++ term = 0x0044; /* Rtt_Nom=RZQ/6=40 Ohm */
++ else
++ term = 0x0204; /* Rtt_Nom=RZQ/8=30 Ohm */
++ }
++
++ return term;
++}
++
++static uint16_t unbuffered_dimm_dynamic_termination_emrs(uint8_t number_of_dimms, uint8_t frequency_index, uint8_t rank_count, uint8_t rank)
++{
++ uint16_t term;
++
++ /* FIXME
++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
++ * For now assume a maximum of 2 DIMMs per channel can be installed
++ */
++ uint8_t MaxDimmsInstallable = 2;
++
++ if (number_of_dimms == 1) {
++ if (MaxDimmsInstallable < 3) {
++ term = 0x00; /* Rtt_WR=off */
++ } else {
++ if (rank_count == 1)
++ term = 0x00; /* Rtt_WR=off */
++ else
++ term = 0x200; /* Rtt_WR=RZQ/4=60 Ohm */
++ }
++ } else {
++ term = 0x400; /* Rtt_WR=RZQ/2=120 Ohm */
++ }
++
++ return term;
++}
++
+ /*-----------------------------------------------------------------------------
+ * void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *DCTData, u8 Dimm, BOOL WL)
+ *
+@@ -295,48 +355,23 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
+ if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
+ tempW1 = RttNomTargetRegDimm(pMCTData, pDCTData, dimm, wl, MemClkFreq, rank);
+ } else {
+- if (wl)
+- {
+- if (pDCTData->MaxDimmsInstalled == 1)
+- {
+- if ((pDCTData->DimmRanks[dimm] == 2) && (rank == 0))
+- {
+- tempW1 = 0x00; /* Rtt_Nom=OFF */
+- }
++ if (wl) {
++ if (rank == 0) {
++ /* Get Rtt_WR for the current DIMM and rank */
++ uint16_t dynamic_term = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank);
++
++ /* Convert dynamic termination code to corresponding nominal termination code */
++ if (dynamic_term == 0x200)
++ tempW1 = 0x04;
++ else if (dynamic_term == 0x400)
++ tempW1 = 0x40;
+ else
+- {
+- tempW1 = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */
+- }
+- }
+- else /* 2 Dimms or more per channel */
+- {
+- if ((pDCTData->DimmRanks[dimm] == 2) && (rank == 1))
+- {
+- tempW1 = 0x00; /* Rtt_Nom=OFF */
+- }
+- else
+- {
+- if (MemClkFreq == 6) {
+- tempW1 = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */
+- } else {
+- tempW1 = 0x40;/* Rtt_Nom=RZQ/2=120 Ohm */
+- }
+- }
+- }
+- }
+- else { /* 1 or 4 Dimms per channel */
+- if ((pDCTData->MaxDimmsInstalled == 1) || (pDCTData->MaxDimmsInstalled == 4))
+- {
+- tempW1 = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */
+- }
+- else /* 2 or 3 Dimms per channel */
+- {
+- if (MemClkFreq < 5) {
+- tempW1 = 0x0044; /* Rtt_Nom=RZQ/6=40 Ohm */
+- } else {
+- tempW1 = 0x0204; /* Rtt_Nom=RZQ/8=30 Ohm */
+- }
++ tempW1 = 0x0;
++ } else {
++ tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank);
+ }
++ } else {
++ tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank);
+ }
+ }
+ tempW=tempW|tempW1;
+@@ -353,20 +388,22 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
+ else
+ {
+ /* Disable the output drivers of all other ranks for
+- * the target DIMM. */
++ * the target DIMM.
++ */
+ tempW = bitTestSet(tempW1, Qoff);
+ }
+ }
+- /* program MrsAddress[5,1]=output driver impedance control (DIC):
+- * based on F2x[1,0]84[DrvImpCtrl] */
++ /* Program MrsAddress[5,1]=output driver impedance control (DIC):
++ * based on F2x[1,0]84[DrvImpCtrl]
++ */
+ tempW1 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
+ FUN_DCT, DRAM_MRS_REGISTER, DrvImpCtrlStart, DrvImpCtrlEnd);
+- if (bitTest(tempW1,1))
+- {tempW = bitTestSet(tempW, 5);}
+- if (bitTest(tempW1,0))
+- {tempW = bitTestSet(tempW, 1);}
++ if (bitTest(tempW1, 1))
++ tempW = bitTestSet(tempW, 5);
++ if (bitTest(tempW1, 0))
++ tempW = bitTestSet(tempW, 1);
+
+- tempW = swapAddrBits_wl(pDCTData,tempW);
++ tempW = swapAddrBits_wl(pDCTData, tempW);
+
+ set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
+ DRAM_INIT, MrsAddressStart, MrsAddressEnd, tempW);
+@@ -404,29 +441,10 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
+ if ((pDCTData->LogicalCPUID & AMD_DR_Bx) && (pDCTData->Status[DCT_STATUS_REGISTERED]))
+ tempW+=0x8;
+ /* determine Rtt_WR for WL & Normal mode */
+- if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
++ if (pDCTData->Status[DCT_STATUS_REGISTERED])
+ tempW1 = RttWrRegDimm(pMCTData, pDCTData, dimm, wl, MemClkFreq, rank);
+- } else {
+- if (wl)
+- {
+- tempW1 = 0x00; /* Rtt_WR=off */
+- }
+- else
+- {
+- if (pDCTData->MaxDimmsInstalled == 1)
+- {
+- tempW1 = 0x00; /* Rtt_WR=off */
+- }
+- else
+- {
+- if (MemClkFreq == 6) {
+- tempW1 = 0x200; /* Rtt_WR=RZQ/4=60 Ohm */
+- } else {
+- tempW1 = 0x400; /* Rtt_WR=RZQ/2 */
+- }
+- }
+- }
+- }
++ else
++ tempW1 = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank);
+ tempW=tempW|tempW1;
+ tempW = swapAddrBits_wl(pDCTData,tempW);
+ set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
+@@ -483,38 +501,10 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
+ }
+
+ /* determine Rtt_Nom for WL & Normal mode */
+- if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
++ if (pDCTData->Status[DCT_STATUS_REGISTERED])
+ tempW1 = RttNomNonTargetRegDimm(pMCTData, pDCTData, currDimm, wl, MemClkFreq, rank);
+- } else {
+- if (wl)
+- {
+- if ((pDCTData->DimmRanks[currDimm] == 2) && (rank == 1))
+- {
+- tempW1 = 0x00; /* Rtt_Nom=OFF */
+- }
+- else
+- {
+- if (MemClkFreq < 5) {
+- tempW1 = 0x0044;/* Rtt_Nom=RZQ/6=40 Ohm */
+- } else {
+- tempW1 = 0x0204;/* Rtt_Nom=RZQ/8=30 Ohm */
+- }
+- }
+- }
+- else { /* 1 or 4 Dimms per channel */
+- if (pDCTData->MaxDimmsInstalled == 4)
+- {
+- tempW1 = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */
+- }
+- else { /* 2 or 3 Dimms per channel */
+- if (MemClkFreq < 5) {
+- tempW1 = 0x0044; /* Rtt_Nom=RZQ/6=40 Ohm */
+- } else {
+- tempW1 = 0x0204; /* Rtt_Nom=RZQ/8=30 Ohm */
+- }
+- }
+- }
+- }
++ else
++ tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank);
+ tempW=tempW|tempW1;
+ /* program MrsAddress[5,1]=output driver impedance control (DIC):
+ * based on F2x[1,0]84[DrvImpCtrl] */
+@@ -560,22 +550,10 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
+ if ((pDCTData->LogicalCPUID & AMD_DR_Bx) && (pDCTData->Status[DCT_STATUS_REGISTERED]))
+ tempW+=0x8;
+ /* determine Rtt_WR for WL & Normal mode */
+- if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
++ if (pDCTData->Status[DCT_STATUS_REGISTERED])
+ tempW1 = RttWrRegDimm(pMCTData, pDCTData, currDimm, wl, MemClkFreq, rank);
+- } else {
+- if (wl)
+- {
+- tempW1 = 0x00; /* Rtt_WR=off */
+- }
+- else
+- {
+- if (MemClkFreq == 6) {
+- tempW1 = 0x200; /* Rtt_WR=RZQ/4=60 Ohm */
+- } else {
+- tempW1 = 0x400; /* Rtt_WR=RZQ/2 */
+- }
+- }
+- }
++ else
++ tempW1 = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank);
+ tempW=tempW|tempW1;
+ tempW = swapAddrBits_wl(pDCTData,tempW);
+ set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
+@@ -646,9 +624,14 @@ void programODT(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm)
+ */
+ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
+ {
+- u8 ByteLane, Seed_Gross, Seed_Fine;
++ u8 ByteLane, Seed_Gross, Seed_Fine, MemClkFreq;
+ u32 Value, Addr;
+ u16 Addl_Data_Offset, Addl_Data_Port;
++ u16 freq_tab[] = {400, 533, 667, 800};
++
++ /* MemClkFreq: 3: 400MHz; 4: 533MHz; 5: 667MHz; 6: 800MHz */
++ MemClkFreq = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
++ FUN_DCT, DRAM_CONFIG_HIGH, 0, 2);
+
+ /* Program F2x[1, 0]9C_x08[WrLvOdt[3:0]] to the proper ODT settings for the
+ * current memory subsystem configuration.
+@@ -656,12 +639,13 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
+ programODT(pMCTData, pDCTData, dimm);
+
+ /* Program F2x[1,0]9C_x08[WrLvOdtEn]=1 */
+- if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx))
++ if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx)) {
+ set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT,
+ DRAM_ADD_DCT_PHY_CONTROL_REG, WrLvOdtEn, WrLvOdtEn, (u32)1);
++ }
+ else
+ {
+- /* Program WrLvOdtEn=1 through set bit 12 of D3CSODT reg offset 0 for Rev.B*/
++ /* Program WrLvOdtEn=1 through set bit 12 of D3CSODT reg offset 0 for Rev.B */
+ if (pDCTData->DctTrain)
+ {
+ Addl_Data_Offset=0x198;
+@@ -687,7 +671,6 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
+
+ /* Wait 10 MEMCLKs to allow for ODT signal settling. */
+ pMCTData->AgesaDelay(10);
+- ByteLane = 0;
+ if (pass == 1)
+ {
+ if (pDCTData->Status[DCT_STATUS_REGISTERED])
+@@ -705,10 +688,17 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
+ }
+ else
+ {
+- Seed_Gross = 0x00;
+- Seed_Fine = 0x1A;
++ if (MemClkFreq == 6) {
++ /* DDR-800 */
++ Seed_Gross = 0x00;
++ Seed_Fine = 0x1a;
++ } else {
++ /* Use settings for DDR-400 (interpolated from BKDG) */
++ Seed_Gross = 0x00;
++ Seed_Fine = 0x0d;
++ }
+ }
+- while(ByteLane < MAX_BYTE_LANES)
++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++)
+ {
+ /* Program an initialization value to registers F2x[1, 0]9C_x[51:50] and
+ * F2x[1, 0]9C_x52 to set the gross and fine delay for all the byte lane fields
+@@ -720,35 +710,32 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
+ */
+ pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross;
+ pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
+- ByteLane++;
+ }
+- } else if (pDCTData->Status[DCT_STATUS_REGISTERED]) { /* For Pass 2 */
++ } else { /* Pass 2 */
+ /* From BKDG, Write Leveling Seed Value. */
+- /* TODO: The unbuffered DIMMs are unstable on the code below. So temporarily it is
+- * only for registered DIMMs. */
+ u32 RegisterDelay, SeedTotal;
+- u8 MemClkFreq;
+- u16 freq_tab[] = {400, 533, 667, 800};
+- while(ByteLane < MAX_BYTE_LANES)
++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++)
+ {
+- MemClkFreq = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
+- FUN_DCT, DRAM_CONFIG_HIGH, 0, 2);
+ if (pDCTData->Status[DCT_STATUS_REGISTERED])
+ RegisterDelay = 0x20; /* TODO: ((RCW2 & BIT0) == 0) ? 0x20 : 0x30; */
+ else
+ RegisterDelay = 0;
+- SeedTotal = (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1F) |
+- pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5;
++ SeedTotal = (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) |
++ (pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5);
+ /* SeedTotalPreScaling = (the total delay value in F2x[1, 0]9C_x[4A:30] from pass 1 of write levelization
+ training) - RegisterDelay. */
+- /* MemClkFreq: 3: 400MHz; 4: 533MHz; 5: 667MHz; 6: 800MHz */
+- SeedTotal = (u16) (RegisterDelay + ((((u32) SeedTotal - RegisterDelay) *
+- freq_tab[MemClkFreq-3]) / 400));
+- Seed_Gross = (SeedTotal & 0x20) != 0 ? 1 : 2;
+- Seed_Fine = SeedTotal & 0x1F;
++ SeedTotal = (uint16_t) (RegisterDelay + ((((uint64_t) SeedTotal - RegisterDelay) *
++ freq_tab[MemClkFreq-3] * 100) / (freq_tab[0] * 100)));
++ Seed_Gross = SeedTotal / 32;
++ Seed_Fine = SeedTotal & 0x1f;
++ if (Seed_Gross == 0)
++ Seed_Gross = 0;
++ else if (Seed_Gross & 0x1)
++ Seed_Gross = 1;
++ else
++ Seed_Gross = 2;
+ pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross;
+ pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
+- ByteLane ++;
+ }
+ }
+
+diff --git a/src/northbridge/amd/amdmct/wrappers/mcti_d.c b/src/northbridge/amd/amdmct/wrappers/mcti_d.c
+index ea32893..47260f2 100644
+--- a/src/northbridge/amd/amdmct/wrappers/mcti_d.c
++++ b/src/northbridge/amd/amdmct/wrappers/mcti_d.c
+@@ -49,7 +49,7 @@ static const uint16_t ddr3_limits[4] = {800, 666, 533, 400};
+ static u16 mctGet_NVbits(u8 index)
+ {
+ u16 val = 0;
+- int nvram;
++ int nvram = 0;
+
+ switch (index) {
+ case NV_PACK_TYPE:
+@@ -59,6 +59,10 @@ static u16 mctGet_NVbits(u8 index)
+ val = 1;
+ #elif CONFIG_CPU_SOCKET_TYPE == 0x13 /* ASB2 */
+ val = 4;
++#elif CONFIG_CPU_SOCKET_TYPE == 0x14 /* C32 */
++ val = 5;
++#elif CONFIG_CPU_SOCKET_TYPE == 0x15 /* G34 */
++ val = 3;
+ //#elif SYSTEM_TYPE == MOBILE
+ // val = 2;
+ #endif
+@@ -297,6 +301,8 @@ static void mctGet_MaxLoadFreq(struct DCTStatStruc *pDCTstat)
+ /* Determine the number of installed DIMMs */
+ int ch1_count = 0;
+ int ch2_count = 0;
++ uint8_t ch1_registered = 0;
++ uint8_t ch2_registered = 0;
+ int i;
+ for (i = 0; i < 15; i = i + 2) {
+ if (pDCTstat->DIMMValid & (1 << i))
+@@ -304,13 +310,19 @@ static void mctGet_MaxLoadFreq(struct DCTStatStruc *pDCTstat)
+ if (pDCTstat->DIMMValid & (1 << (i + 1)))
+ ch2_count++;
+ }
++ for (i = 0; i < MAX_DIMMS_SUPPORTED; i = i + 2) {
++ if (pDCTstat->DimmRegistered[i])
++ ch1_registered = 1;
++ if (pDCTstat->DimmRegistered[i + 1])
++ ch2_registered = 1;
++ }
+ if (IS_ENABLED(CONFIG_DEBUG_RAM_SETUP)) {
+ printk(BIOS_DEBUG, "mctGet_MaxLoadFreq: Channel 1: %d DIMM(s) detected\n", ch1_count);
+ printk(BIOS_DEBUG, "mctGet_MaxLoadFreq: Channel 2: %d DIMM(s) detected\n", ch2_count);
+ }
+
+ /* Set limits if needed */
+- pDCTstat->PresetmaxFreq = mct_MaxLoadFreq(max(ch1_count, ch2_count), pDCTstat->PresetmaxFreq);
++ pDCTstat->PresetmaxFreq = mct_MaxLoadFreq(max(ch1_count, ch2_count), (ch1_registered || ch2_registered), pDCTstat->PresetmaxFreq);
+ }
+
+ #ifdef UNUSED_CODE
+@@ -413,101 +425,6 @@ static void mctHookAfterDramInit(void)
+ }
+
+ #if (CONFIG_DIMM_SUPPORT & 0x000F)==0x0005 /* AMD_FAM10_DDR3 */
+-static void coreDelay(u32 microseconds)
+-{
+- msr_t now;
+- msr_t end;
+- u32 cycles;
+-
+- /* delay ~40us
+- This seems like a hack to me...
+- It would be nice to have a central delay function. */
+-
+- cycles = (microseconds * 100) << 3; /* x8 (number of 1.25ns ticks) */
+-
+- if (!(rdmsr(HWCR).lo & TSC_FREQ_SEL_MASK)) {
+- msr_t pstate_msr = rdmsr(CUR_PSTATE_MSR);
+- if (!(rdmsr(0xC0010064+pstate_msr.lo).lo & NB_DID_M_ON)) {
+- cycles = cycles <<1; // half freq, double cycles
+- }
+- } // else should we keep p0 freq at the time of setting TSC_FREQ_SEL_MASK somewhere and check it here ?
+-
+- now = rdmsr(TSC_MSR);
+- // avoid overflow when called near 2^32 ticks ~ 5.3 s boundaries
+- if (0xffffffff - cycles >= now.lo ) {
+- end.hi = now.hi;
+- end.lo = now.lo + cycles;
+- } else {
+- end.hi = now.hi +1; //
+- end.lo = cycles - (1+(0xffffffff - now.lo));
+- }
+- do {
+- now = rdmsr(TSC_MSR);
+- } while ((now.hi < end.hi) || ((now.hi == end.hi) && (now.lo < end.lo)));
+-}
+-
+-/* Erratum 350 */
+-static void vErrata350(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat)
+-{
+- u8 u8Channel;
+- u8 u8Receiver;
+- u32 u32Addr;
+- u8 u8Valid;
+- u32 u32DctDev;
+-
+- // 1. dummy read for each installed DIMM */
+- for (u8Channel = 0; u8Channel < 2; u8Channel++) {
+- // This will be 0 for vaild DIMMS, eles 8
+- u8Receiver = mct_InitReceiver_D(pDCTstat, u8Channel);
+-
+- for (; u8Receiver < 8; u8Receiver += 2) {
+- u32Addr = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, u8Channel, u8Receiver, &u8Valid);
+-
+- if(!u8Valid) { /* Address not supported on current CS */
+- print_t("vErrata350: Address not supported on current CS\n");
+- continue;
+- }
+- print_t("vErrata350: dummy read \n");
+- read32_fs(u32Addr);
+- }
+- }
+-
+- print_t("vErrata350: step 2a\n");
+-
+- /* 2. Write 0000_8000h to register F2x[1, 0]9C_xD080F0C. */
+- u32DctDev = pDCTstat->dev_dct;
+- Set_NB32_index_wait(u32DctDev, 0x098, 0xD080F0C, 0x00008000);
+- /* ^--- value
+- ^---F2x[1, 0]9C_x0D080F0C, No description in BKDG.
+- ^----F2x[1, 0]98 DRAM Controller Additional Data Offset Register */
+-
+- if(!pDCTstat->GangedMode) {
+- print_t("vErrata350: step 2b\n");
+- Set_NB32_index_wait(u32DctDev, 0x198, 0xD080F0C, 0x00008000);
+- /* ^--- value
+- ^---F2x[1, 0]9C_x0D080F0C, No description in BKDG
+- ^----F2x[1, 0]98 DRAM Controller Additional Data Offset Register */
+- }
+-
+- print_t("vErrata350: step 3\n");
+- /* 3. Wait at least 300 nanoseconds. */
+- coreDelay(1);
+-
+- print_t("vErrata350: step 4\n");
+- /* 4. Write 0000_0000h to register F2x[1, 0]9C_xD080F0C. */
+- Set_NB32_index_wait(u32DctDev, 0x098, 0xD080F0C, 0x00000000);
+-
+- if(!pDCTstat->GangedMode) {
+- print_t("vErrata350: step 4b\n");
+- Set_NB32_index_wait(u32DctDev, 0x198, 0xD080F0C, 0x00000000);
+- }
+-
+- print_t("vErrata350: step 5\n");
+- /* 5. Wait at least 2 microseconds. */
+- coreDelay(2);
+-
+-}
+-
+ static void vErratum372(struct DCTStatStruc *pDCTstat)
+ {
+ msr_t msr = rdmsr(NB_CFG_MSR);
+@@ -546,8 +463,7 @@ static void mctHookBeforeAnyTraining(struct MCTStatStruc *pMCTstat, struct DCTSt
+ {
+ #if (CONFIG_DIMM_SUPPORT & 0x000F)==0x0005 /* AMD_FAM10_DDR3 */
+ /* FIXME : as of 25.6.2010 errata 350 and 372 should apply to ((RB|BL|DA)-C[23])|(HY-D[01])|(PH-E0) but I don't find constants for all of them */
+- if (pDCTstatA->LogicalCPUID & AMD_DRBH_Cx) {
+- vErrata350(pMCTstat, pDCTstatA);
++ if (pDCTstatA->LogicalCPUID & (AMD_DRBH_Cx | AMD_DR_Dx)) {
+ vErratum372(pDCTstatA);
+ vErratum414(pDCTstatA);
+ }
+--
+1.7.9.5
+