diff options
Diffstat (limited to 'resources/libreboot/patch/kgpe-d16/0011-northbridge-amd-amdmct-Fix-broken-AMD-K10-DDR3-memor.patch')
-rw-r--r-- | resources/libreboot/patch/kgpe-d16/0011-northbridge-amd-amdmct-Fix-broken-AMD-K10-DDR3-memor.patch | 3451 |
1 files changed, 3451 insertions, 0 deletions
diff --git a/resources/libreboot/patch/kgpe-d16/0011-northbridge-amd-amdmct-Fix-broken-AMD-K10-DDR3-memor.patch b/resources/libreboot/patch/kgpe-d16/0011-northbridge-amd-amdmct-Fix-broken-AMD-K10-DDR3-memor.patch new file mode 100644 index 00000000..ec822df4 --- /dev/null +++ b/resources/libreboot/patch/kgpe-d16/0011-northbridge-amd-amdmct-Fix-broken-AMD-K10-DDR3-memor.patch @@ -0,0 +1,3451 @@ +From 791a6ea672f16f971422f10514bb0c4225930489 Mon Sep 17 00:00:00 2001 +From: Timothy Pearson <kb9vqf@pearsoncomputing.net> +Date: Sat, 5 Sep 2015 17:55:58 -0500 +Subject: [PATCH 011/146] northbridge/amd/amdmct: Fix broken AMD K10 DDR3 + memory initalization + +--- + src/northbridge/amd/amdmct/mct/mct_d.c | 1 - + src/northbridge/amd/amdmct/mct_ddr3/mct_d.c | 177 ++++- + src/northbridge/amd/amdmct/mct_ddr3/mct_d.h | 8 +- + src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h | 87 +-- + src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c | 6 +- + src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c | 806 ++++++++++++----------- + src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c | 6 +- + src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c | 14 +- + src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c | 3 +- + src/northbridge/amd/amdmct/mct_ddr3/mctproc.c | 19 +- + src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c | 5 +- + src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c | 803 +++++++++++----------- + src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c | 18 +- + src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c | 13 +- + src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c | 7 +- + src/northbridge/amd/amdmct/mct_ddr3/mctwl.c | 42 +- + src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c | 267 ++++---- + src/northbridge/amd/amdmct/wrappers/mcti_d.c | 114 +--- + 18 files changed, 1254 insertions(+), 1142 deletions(-) + +diff --git a/src/northbridge/amd/amdmct/mct/mct_d.c b/src/northbridge/amd/amdmct/mct/mct_d.c +index 3dec934..88910e2 100644 +--- a/src/northbridge/amd/amdmct/mct/mct_d.c ++++ b/src/northbridge/amd/amdmct/mct/mct_d.c +@@ -542,7 +542,6 @@ static void HTMemMapInit_D(struct MCTStatStruc *pMCTstat, + pDCTstat = pDCTstatA + Node; + devx = pDCTstat->dev_map; + DramSelBaseAddr = 0; +- pDCTstat = pDCTstatA + Node; + if (!pDCTstat->GangedMode) { + DramSelBaseAddr = pDCTstat->NodeSysLimit - pDCTstat->DCTSysLimit; + /*In unganged mode, we must add DCT0 and DCT1 to DCTSysLimit */ +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c +index 71a6be8..fa59d71 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c +@@ -214,6 +214,8 @@ static const u8 Table_DQSRcvEn_Offset[] = {0x00,0x01,0x10,0x11,0x2}; + static const u8 Tab_L1CLKDis[] = {0x20, 0x20, 0x10, 0x10, 0x08, 0x08, 0x04, 0x04}; + static const u8 Tab_AM3CLKDis[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00}; + static const u8 Tab_S1CLKDis[] = {0xA2, 0xA2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; ++static const u8 Tab_C32CLKDis[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00}; /* Enable CS0 - CS3 clocks (DIMM0 - DIMM1) */ ++static const u8 Tab_G34CLKDis[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00}; /* Enable CS0 - CS3 clocks (DIMM0 - DIMM1) */ + static const u8 Tab_ManualCLKDis[]= {0x10, 0x04, 0x08, 0x20, 0x00, 0x00, 0x00, 0x00}; + + static const u8 Table_Comp_Rise_Slew_20x[] = {7, 3, 2, 2, 0xFF}; +@@ -277,6 +279,11 @@ restartinit: + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; ++ ++ /* Zero out data structures to avoid false detection of DIMMs */ ++ memset(pDCTstat, 0, sizeof(struct DCTStatStruc)); ++ ++ /* Initialize data structures */ + pDCTstat->Node_ID = Node; + pDCTstat->dev_host = PA_HOST(Node); + pDCTstat->dev_map = PA_MAP(Node); +@@ -284,17 +291,22 @@ restartinit: + pDCTstat->dev_nbmisc = PA_NBMISC(Node); + pDCTstat->NodeSysBase = node_sys_base; + ++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_init Node %d\n", Node); + mct_init(pMCTstat, pDCTstat); + mctNodeIDDebugPort_D(); + pDCTstat->NodePresent = NodePresent_D(Node); + if (pDCTstat->NodePresent) { /* See if Node is there*/ ++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: clear_legacy_Mode\n"); + clear_legacy_Mode(pMCTstat, pDCTstat); + pDCTstat->LogicalCPUID = mctGetLogicalCPUID_D(Node); + ++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_InitialMCT_D\n"); + mct_InitialMCT_D(pMCTstat, pDCTstat); + ++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mctSMBhub_Init\n"); + mctSMBhub_Init(Node); /* Switch SMBUS crossbar to proper node*/ + ++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_initDCT\n"); + mct_initDCT(pMCTstat, pDCTstat); + if (pDCTstat->ErrCode == SC_FatalErr) { + goto fatalexit; /* any fatal errors?*/ +@@ -345,6 +357,7 @@ restartinit: + + mct_FinalMCT_D(pMCTstat, pDCTstatA); + printk(BIOS_DEBUG, "mctAutoInitMCT_D Done: Global Status: %x\n", pMCTstat->GStatus); ++ + return; + + fatalexit: +@@ -560,7 +573,6 @@ static void HTMemMapInit_D(struct MCTStatStruc *pMCTstat, + pDCTstat = pDCTstatA + Node; + devx = pDCTstat->dev_map; + DramSelBaseAddr = 0; +- pDCTstat = pDCTstatA + Node; /* ??? */ + if (!pDCTstat->GangedMode) { + DramSelBaseAddr = pDCTstat->NodeSysLimit - pDCTstat->DCTSysLimit; + /*In unganged mode, we must add DCT0 and DCT1 to DCTSysLimit */ +@@ -645,6 +657,7 @@ static void HTMemMapInit_D(struct MCTStatStruc *pMCTstat, + devx = pDCTstat->dev_map; + + if (pDCTstat->NodePresent) { ++ printk(BIOS_DEBUG, " Copy dram map from Node 0 to Node %02x \n", Node); + reg = 0x40; /*Dram Base 0*/ + do { + val = Get_NB32(dev, reg); +@@ -1162,7 +1175,7 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat, + + /* Program DRAM Timing values */ + DramTimingLo = 0; /* Dram Timing Low init */ +- val = pDCTstat->CASL - 2; /* pDCTstat.CASL to reg. definition */ ++ val = pDCTstat->CASL - 4; /* pDCTstat.CASL to reg. definition */ + DramTimingLo |= val; + + val = pDCTstat->Trcd - Bias_TrcdT; +@@ -1406,18 +1419,16 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat, + else if (tCKproposed16x <= 24) { + pDCTstat->TargetFreq = 6; + tCKproposed16x = 24; +- } +- else if (tCKproposed16x <= 30) { ++ } else if (tCKproposed16x <= 30) { + pDCTstat->TargetFreq = 5; + tCKproposed16x = 30; +- } +- else { ++ } else { + pDCTstat->TargetFreq = 4; + tCKproposed16x = 40; + } + /* Running through this loop twice: + - First time find tCL at target frequency +- - Second tim find tCL at 400MHz */ ++ - Second time find tCL at 400MHz */ + + for (;;) { + CLT_Fail = 0; +@@ -1451,7 +1462,7 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat, + CLT_Fail = 1; + /* get CL and T */ + if (!CLT_Fail) { +- bytex = CLactual - 2; ++ bytex = CLactual; + if (tCKproposed16x == 20) + byte = 7; + else if (tCKproposed16x == 24) +@@ -1632,7 +1643,7 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, + val = 0x0f; /* recommended setting (default) */ + DramConfigHi |= val << 24; + +- if (pDCTstat->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Bx)) ++ if (pDCTstat->LogicalCPUID & (AMD_DR_Dx | AMD_DR_Cx | AMD_DR_Bx)) + DramConfigHi |= 1 << DcqArbBypassEn; + + /* Build MemClkDis Value from Dram Timing Lo and +@@ -1657,6 +1668,10 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, + p = Tab_L1CLKDis; + else if (byte == PT_M2 || byte == PT_AS) + p = Tab_AM3CLKDis; ++ else if (byte == PT_C3) ++ p = Tab_C32CLKDis; ++ else if (byte == PT_GR) ++ p = Tab_G34CLKDis; + else + p = Tab_S1CLKDis; + +@@ -2102,8 +2117,7 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat, + if (byte == JED_RDIMM || byte == JED_MiniRDIMM) { + RegDIMMPresent |= 1 << i; + pDCTstat->DimmRegistered[i] = 1; +- } +- else { ++ } else { + pDCTstat->DimmRegistered[i] = 0; + } + /* Check ECC capable */ +@@ -2977,9 +2991,9 @@ static void mct_FinalMCT_D(struct MCTStatStruc *pMCTstat, + } else { /* For Dx CPU */ + val = 0x0CE00F00 | 1 << 29/* FlushWrOnStpGnt */; + if (!(pDCTstat->GangedMode)) +- val |= 0x20; /* MctWrLimit = 8 for Unganed mode */ ++ val |= 0x20; /* MctWrLimit = 8 for Unganged mode */ + else +- val |= 0x40; /* MctWrLimit = 16 for ganed mode */ ++ val |= 0x40; /* MctWrLimit = 16 for ganged mode */ + Set_NB32(pDCTstat->dev_dct, 0x11C, val); + + val = Get_NB32(pDCTstat->dev_dct, 0x1B0); +@@ -3414,6 +3428,138 @@ static void mct_BeforeDramInit_Prod_D(struct MCTStatStruc *pMCTstat, + Set_NB32(dev, 0x98 + reg_off, 0x0D000030); + Set_NB32(dev, 0x9C + reg_off, dword); + Set_NB32(dev, 0x98 + reg_off, 0x4D040F30); ++ ++ /* FIXME ++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel ++ * For now assume a maximum of 2 DIMMs per channel can be installed ++ */ ++ uint8_t MaxDimmsInstallable = 2; ++ ++ /* Obtain number of DIMMs on channel */ ++ uint8_t dimm_count = pDCTstat->MAdimms[i]; ++ uint8_t rank_count_dimm0; ++ uint8_t rank_count_dimm1; ++ uint32_t odt_pattern_0; ++ uint32_t odt_pattern_1; ++ uint32_t odt_pattern_2; ++ uint32_t odt_pattern_3; ++ ++ /* Select appropriate ODT pattern for installed DIMMs ++ * Refer to the BKDG Rev. 3.62, page 120 onwards ++ */ ++ if (pDCTstat->C_DCTPtr[i]->Status[DCT_STATUS_REGISTERED]) { ++ if (MaxDimmsInstallable == 2) { ++ if (dimm_count == 1) { ++ /* 1 DIMM detected */ ++ rank_count_dimm1 = pDCTstat->C_DCTPtr[i]->DimmRanks[1]; ++ if (rank_count_dimm1 == 1) { ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x00020000; ++ } else if (rank_count_dimm1 == 2) { ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x02080000; ++ } else if (rank_count_dimm1 == 4) { ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x020a0000; ++ odt_pattern_3 = 0x080a0000; ++ } else { ++ /* Fallback */ ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x00000000; ++ } ++ } else { ++ /* 2 DIMMs detected */ ++ rank_count_dimm0 = pDCTstat->C_DCTPtr[i]->DimmRanks[0]; ++ rank_count_dimm1 = pDCTstat->C_DCTPtr[i]->DimmRanks[1]; ++ if ((rank_count_dimm0 < 4) && (rank_count_dimm1 < 4)) { ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x01010202; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x09030603; ++ } else if ((rank_count_dimm0 < 4) && (rank_count_dimm1 == 4)) { ++ odt_pattern_0 = 0x01010000; ++ odt_pattern_1 = 0x01010a0a; ++ odt_pattern_2 = 0x01090000; ++ odt_pattern_3 = 0x01030e0b; ++ } else if ((rank_count_dimm0 == 4) && (rank_count_dimm1 < 4)) { ++ odt_pattern_0 = 0x00000202; ++ odt_pattern_1 = 0x05050202; ++ odt_pattern_2 = 0x00000206; ++ odt_pattern_3 = 0x0d070203; ++ } else if ((rank_count_dimm0 == 4) && (rank_count_dimm1 == 4)) { ++ odt_pattern_0 = 0x05050a0a; ++ odt_pattern_1 = 0x05050a0a; ++ odt_pattern_2 = 0x050d0a0e; ++ odt_pattern_3 = 0x05070a0b; ++ } else { ++ /* Fallback */ ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x00000000; ++ } ++ } ++ } else { ++ /* FIXME ++ * 3 DIMMs per channel UNIMPLEMENTED ++ */ ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x00000000; ++ } ++ } else { ++ if (MaxDimmsInstallable == 2) { ++ if (dimm_count == 1) { ++ /* 1 DIMM detected */ ++ rank_count_dimm1 = pDCTstat->C_DCTPtr[i]->DimmRanks[1]; ++ if (rank_count_dimm1 == 1) { ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x00020000; ++ } else if (rank_count_dimm1 == 2) { ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x02080000; ++ } else { ++ /* Fallback */ ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x00000000; ++ } ++ } else { ++ /* 2 DIMMs detected */ ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x01010202; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x09030603; ++ } ++ } else { ++ /* FIXME ++ * 3 DIMMs per channel UNIMPLEMENTED ++ */ ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x00000000; ++ } ++ } ++ ++ /* Program ODT pattern */ ++ Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x180, odt_pattern_1); ++ Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x181, odt_pattern_0); ++ Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x182, odt_pattern_3); ++ Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x183, odt_pattern_2); + } + } + } +@@ -3657,6 +3803,7 @@ static void mct_BeforeDQSTrain_D(struct MCTStatStruc *pMCTstat, + } + } + ++/* Erratum 350 */ + static void mct_ResetDLL_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) + { +@@ -3692,11 +3839,11 @@ static void mct_ResetDLL_D(struct MCTStatStruc *pMCTstat, + mct_Read1LTestPattern_D(pMCTstat, pDCTstat, addr); /* cache fills */ + + /* Write 0000_8000h to register F2x[1,0]9C_xD080F0C */ +- Set_NB32_index_wait(dev, 0x98 + reg_off, 0x4D080F0C, 0x00008000); ++ Set_NB32_index_wait(dev, 0x98 + reg_off, 0xD080F0C, 0x00008000); + mct_Wait(80); /* wait >= 300ns */ + + /* Write 0000_0000h to register F2x[1,0]9C_xD080F0C */ +- Set_NB32_index_wait(dev, 0x98 + reg_off, 0x4D080F0C, 0x00000000); ++ Set_NB32_index_wait(dev, 0x98 + reg_off, 0xD080F0C, 0x00000000); + mct_Wait(800); /* wait >= 2us */ + break; + } +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h +index e2d7aa8..219aa42 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h +@@ -499,7 +499,7 @@ struct DCTStatStruc { /* A per Node structure*/ + /* CHB DIMM0 Byte 0 - 7 TxDqs */ + /* CHB DIMM1 Byte 0 - 7 TxDqs */ + /* CHB DIMM1 Byte 0 - 7 TxDqs */ +- u8 CH_D_B_RCVRDLY[2][4][8]; /* [A/B] [DIMM0-3] [DQS] */ ++ u16 CH_D_B_RCVRDLY[2][4][8]; /* [A/B] [DIMM0-3] [DQS] */ + /* CHA DIMM 0 Receiver Enable Delay*/ + /* CHA DIMM 1 Receiver Enable Delay*/ + /* CHA DIMM 2 Receiver Enable Delay*/ +@@ -509,7 +509,7 @@ struct DCTStatStruc { /* A per Node structure*/ + /* CHB DIMM 1 Receiver Enable Delay*/ + /* CHB DIMM 2 Receiver Enable Delay*/ + /* CHB DIMM 3 Receiver Enable Delay*/ +- u8 CH_D_BC_RCVRDLY[2][4]; ++ u16 CH_D_BC_RCVRDLY[2][4]; + /* CHA DIMM 0 - 4 Check Byte Receiver Enable Delay*/ + /* CHB DIMM 0 - 4 Check Byte Receiver Enable Delay*/ + u8 DIMMValidDCT[2]; /* DIMM# in DCT0*/ +@@ -769,7 +769,7 @@ u8 mct_checkNumberOfDqsRcvEn_1Pass(u8 pass); + u32 SetupDqsPattern_1PassA(u8 Pass); + u32 SetupDqsPattern_1PassB(u8 Pass); + u8 mct_Get_Start_RcvrEnDly_1Pass(u8 Pass); +-u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 RcvrEnDlyLimit, u8 Channel, u8 Receiver, u8 Pass); ++u16 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly, u16 RcvrEnDlyLimit, u8 Channel, u8 Receiver, u8 Pass); + void CPUMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); + void UMAMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); + u32 mctGetLogicalCPUID(u32 Node); +@@ -779,7 +779,7 @@ void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTs + void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); + void TrainMaxReadLatency_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); + void mct_EndDQSTraining_D(struct MCTStatStruc *pMCTstat,struct DCTStatStruc *pDCTstatA); +-void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 FinalValue, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass); ++void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly, u8 FinalValue, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass); + void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel); + void mctGet_PS_Cfg_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 dct); + void InterleaveBanks_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct); +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h +index 60f98bc..c40ea1a 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -103,10 +104,10 @@ static void proc_CLFLUSH(u32 addr_hi) + + __asm__ volatile ( + /* clflush fs:[eax] */ +- "outb %%al, $0xed\n\t" /* _EXECFENCE */ +- "clflush %%fs:(%0)\n\t" ++ "outb %%al, $0xed\n\t" /* _EXECFENCE */ ++ "clflush %%fs:(%0)\n\t" + "mfence\n\t" +- ::"a" (addr_hi<<8) ++ ::"a" (addr_hi<<8) + ); + } + +@@ -141,6 +142,24 @@ static u32 read32_fs(u32 addr_lo) + return value; + } + ++static uint64_t read64_fs(uint32_t addr_lo) ++{ ++ uint64_t value = 0; ++ uint32_t value_lo; ++ uint32_t value_hi; ++ ++ __asm__ volatile ( ++ "outb %%al, $0xed\n\t" /* _EXECFENCE */ ++ "mfence\n\t" ++ "movl %%fs:(%2), %0\n\t" ++ "movl %%fs:(%3), %1\n\t" ++ :"=c"(value_lo), "=d"(value_hi): "a" (addr_lo), "b" (addr_lo + 4) : "memory" ++ ); ++ value |= value_lo; ++ value |= ((uint64_t)value_hi) << 32; ++ return value; ++} ++ + #ifdef UNUSED_CODE + static u8 read8_fs(u32 addr_lo) + { +@@ -210,68 +229,6 @@ static __attribute__((noinline)) void FlushDQSTestPattern_L18(u32 addr_lo) + ); + } + +-static void ReadL18TestPattern(u32 addr_lo) +-{ +- /* set fs and use fs prefix to access the mem */ +- __asm__ volatile ( +- "outb %%al, $0xed\n\t" /* _EXECFENCE */ +- "movl %%fs:-128(%%esi), %%eax\n\t" /* TestAddr cache line */ +- "movl %%fs:-64(%%esi), %%eax\n\t" /* +1 */ +- "movl %%fs:(%%esi), %%eax\n\t" /* +2 */ +- "movl %%fs:64(%%esi), %%eax\n\t" /* +3 */ +- +- "movl %%fs:-128(%%edi), %%eax\n\t" /* +4 */ +- "movl %%fs:-64(%%edi), %%eax\n\t" /* +5 */ +- "movl %%fs:(%%edi), %%eax\n\t" /* +6 */ +- "movl %%fs:64(%%edi), %%eax\n\t" /* +7 */ +- +- "movl %%fs:-128(%%ebx), %%eax\n\t" /* +8 */ +- "movl %%fs:-64(%%ebx), %%eax\n\t" /* +9 */ +- "movl %%fs:(%%ebx), %%eax\n\t" /* +10 */ +- "movl %%fs:64(%%ebx), %%eax\n\t" /* +11 */ +- +- "movl %%fs:-128(%%ecx), %%eax\n\t" /* +12 */ +- "movl %%fs:-64(%%ecx), %%eax\n\t" /* +13 */ +- "movl %%fs:(%%ecx), %%eax\n\t" /* +14 */ +- "movl %%fs:64(%%ecx), %%eax\n\t" /* +15 */ +- +- "movl %%fs:-128(%%edx), %%eax\n\t" /* +16 */ +- "movl %%fs:-64(%%edx), %%eax\n\t" /* +17 */ +- "mfence\n\t" +- +- :: "a"(0), "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), +- "d" (addr_lo +128+16*64), "S"(addr_lo+128), +- "D"(addr_lo+128+4*64) +- ); +- +-} +- +-static void ReadL9TestPattern(u32 addr_lo) +-{ +- +- /* set fs and use fs prefix to access the mem */ +- __asm__ volatile ( +- "outb %%al, $0xed\n\t" /* _EXECFENCE */ +- +- "movl %%fs:-128(%%ecx), %%eax\n\t" /* TestAddr cache line */ +- "movl %%fs:-64(%%ecx), %%eax\n\t" /* +1 */ +- "movl %%fs:(%%ecx), %%eax\n\t" /* +2 */ +- "movl %%fs:64(%%ecx), %%eax\n\t" /* +3 */ +- +- "movl %%fs:-128(%%edx), %%eax\n\t" /* +4 */ +- "movl %%fs:-64(%%edx), %%eax\n\t" /* +5 */ +- "movl %%fs:(%%edx), %%eax\n\t" /* +6 */ +- "movl %%fs:64(%%edx), %%eax\n\t" /* +7 */ +- +- "movl %%fs:-128(%%ebx), %%eax\n\t" /* +8 */ +- "mfence\n\t" +- +- :: "a"(0), "b" (addr_lo+128+8*64), "c"(addr_lo+128), +- "d"(addr_lo+128+4*64) +- ); +- +-} +- + static void ReadMaxRdLat1CLTestPattern_D(u32 addr) + { + SetUpperFSbase(addr); +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c b/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c +index ae1654c..99a2628 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -17,7 +18,7 @@ + * Foundation, Inc. + */ + +-/* The socket type F (1207), Fr2, G (1207) are not tested. ++/* The socket type Fr2, G (1207) are not tested. + */ + + static void Get_ChannelPS_Cfg0_D(u8 MAAdimms, u8 Speed, u8 MAAload, +@@ -79,8 +80,7 @@ static void Get_ChannelPS_Cfg0_D( u8 MAAdimms, u8 Speed, u8 MAAload, + else + *AddrTmgCTL = 0x00353935; + } +- } +- else { ++ } else { + if(Speed == 4) { + *AddrTmgCTL = 0x00000000; + if (MAAdimms == 3) +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c +index 404727b..8572243 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -22,13 +23,6 @@ static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat, + u8 scale, u8 ChipSel); + static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 ChipSel); +-static u8 MiddleDQS_D(u8 min, u8 max); +-static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, +- u8 cs_start); +-static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, +- u8 cs_start); + static void WriteDQSTestPattern_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u32 TestAddr_lo); +@@ -43,31 +37,19 @@ static void FlushDQSTestPattern_D(struct DCTStatStruc *pDCTstat, + u32 addr_lo); + static void SetTargetWTIO_D(u32 TestAddr); + static void ResetTargetWTIO_D(void); +-static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, +- u32 TestAddr_lo); +-static void mctEngDQSwindow_Save_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, u8 ChipSel, +- u8 RnkDlyFilterMin, u8 RnkDlyFilterMax); + void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index); + u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); + static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u8 ChipSel); +-static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, +- u8 cs_start); + u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Channel, + u8 receiver, u8 *valid); + static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u32 *buffer); +- +-static void StoreWrRdDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, u8 ChipSel, +- u8 RnkDlyFilterMin, u8 RnkDlyFilterMax); ++static void proc_IOCLFLUSH_D(u32 addr_hi); + + static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 ChipSel); + +@@ -286,20 +268,99 @@ static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat, + pDCTstat->DQSDelay = (u8)DQSDelay; + } + ++static void write_dqs_write_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dimm, uint32_t index_reg) ++{ ++ uint32_t dword; ++ ++ /* Lanes 0 - 3 */ ++ dword = Get_NB32_index_wait(dev, index_reg, 0x1 | (dimm << 8)); ++ dword &= ~0x7f7f7f7f; ++ dword |= (delay[3] & 0x7f) << 24; ++ dword |= (delay[2] & 0x7f) << 16; ++ dword |= (delay[1] & 0x7f) << 8; ++ dword |= delay[0] & 0x7f; ++ Set_NB32_index_wait(dev, index_reg, 0x1 | (dimm << 8), dword); ++ ++ /* Lanes 4 - 7 */ ++ dword = Get_NB32_index_wait(dev, index_reg, 0x2 | (dimm << 8)); ++ dword &= ~0x7f7f7f7f; ++ dword |= (delay[7] & 0x7f) << 24; ++ dword |= (delay[6] & 0x7f) << 16; ++ dword |= (delay[5] & 0x7f) << 8; ++ dword |= delay[4] & 0x7f; ++ Set_NB32_index_wait(dev, index_reg, 0x2 | (dimm << 8), dword); ++ ++ /* Lane 8 (ECC) */ ++ dword = Get_NB32_index_wait(dev, index_reg, 0x3 | (dimm << 8)); ++ dword &= ~0x0000007f; ++ dword |= delay[8] & 0x7f; ++ Set_NB32_index_wait(dev, index_reg, 0x3 | (dimm << 8), dword); ++} ++ ++static void write_dqs_read_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dimm, uint32_t index_reg) ++{ ++ uint32_t dword; ++ ++ /* Lanes 0 - 3 */ ++ dword = Get_NB32_index_wait(dev, index_reg, 0x5 | (dimm << 8)); ++ dword &= ~0x3f3f3f3f; ++ dword |= (delay[3] & 0x3f) << 24; ++ dword |= (delay[2] & 0x3f) << 16; ++ dword |= (delay[1] & 0x3f) << 8; ++ dword |= delay[0] & 0x3f; ++ Set_NB32_index_wait(dev, index_reg, 0x5 | (dimm << 8), dword); ++ ++ /* Lanes 4 - 7 */ ++ dword = Get_NB32_index_wait(dev, index_reg, 0x6 | (dimm << 8)); ++ dword &= ~0x3f3f3f3f; ++ dword |= (delay[7] & 0x3f) << 24; ++ dword |= (delay[6] & 0x3f) << 16; ++ dword |= (delay[5] & 0x3f) << 8; ++ dword |= delay[4] & 0x3f; ++ Set_NB32_index_wait(dev, index_reg, 0x6 | (dimm << 8), dword); ++ ++ /* Lane 8 (ECC) */ ++ dword = Get_NB32_index_wait(dev, index_reg, 0x7 | (dimm << 8)); ++ dword &= ~0x0000003f; ++ dword |= delay[8] & 0x3f; ++ Set_NB32_index_wait(dev, index_reg, 0x7 | (dimm << 8), dword); ++} ++ ++/* DQS Position Training ++ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.3 ++ */ + static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, +- u8 cs_start) ++ struct DCTStatStruc *pDCTstat) + { + u32 Errors; +- u8 Channel, DQSWrDelay; ++ u8 Channel; ++ u8 Receiver; + u8 _DisableDramECC = 0; +- u32 PatternBuffer[292]; ++ u32 PatternBuffer[304]; /* 288 + 16 */ + u8 _Wrap32Dis = 0, _SSE2 = 0; +- u8 dqsWrDelay_end; + ++ u32 dev; + u32 addr; ++ u8 valid; + u32 cr4; + u32 lo, hi; ++ u32 index_reg; ++ uint32_t TestAddr; ++ ++ uint8_t dual_rank; ++ uint8_t iter; ++ uint8_t lane; ++ uint16_t bytelane_test_results; ++ uint16_t current_write_dqs_delay[MAX_BYTE_LANES]; ++ uint16_t current_read_dqs_delay[MAX_BYTE_LANES]; ++ uint16_t write_dqs_delay_stepping_done[MAX_BYTE_LANES]; ++ uint8_t dqs_read_results_array[2][MAX_BYTE_LANES][64]; /* [rank][lane][step] */ ++ uint8_t dqs_write_results_array[2][MAX_BYTE_LANES][128]; /* [rank][lane][step] */ ++ ++ uint8_t last_pos = 0; ++ uint8_t cur_count = 0; ++ uint8_t best_pos = 0; ++ uint8_t best_count = 0; + + print_debug_dqs("\nTrainDQSRdWrPos: Node_ID ", pDCTstat->Node_ID, 0); + cr4 = read_cr4(); +@@ -323,50 +384,363 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, + SetupDqsPattern_D(pMCTstat, pDCTstat, PatternBuffer); + + /* mct_BeforeTrainDQSRdWrPos_D */ +- dqsWrDelay_end = 0x20; ++ ++ dev = pDCTstat->dev_dct; ++ pDCTstat->Direction = DQS_READDIR; ++ ++ /* 2.8.9.9.3 (2) ++ * Loop over each channel, lane, and rank ++ */ ++ ++ /* NOTE ++ * The BKDG originally stated to iterate over lane, then rank, however this process is quite slow ++ * compared to an equivalent loop over rank, then lane as the latter allows multiple lanes to be ++ * tested simultaneously, thus improving performance by around 8x. ++ */ + + Errors = 0; + for (Channel = 0; Channel < 2; Channel++) { +- print_debug_dqs("\tTrainDQSRdWrPos: 1 Channel ",Channel, 1); ++ print_debug_dqs("\tTrainDQSRdWrPos: 1 Channel ", Channel, 1); + pDCTstat->Channel = Channel; + + if (pDCTstat->DIMMValidDCT[Channel] == 0) /* mct_BeforeTrainDQSRdWrPos_D */ + continue; +- pDCTstat->DqsRdWrPos_Saved = 0; +- for ( DQSWrDelay = 0; DQSWrDelay < dqsWrDelay_end; DQSWrDelay++) { +- pDCTstat->DQSDelay = DQSWrDelay; +- pDCTstat->Direction = DQS_WRITEDIR; +- mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start); +- +- print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2); +- TrainReadDQS_D(pMCTstat, pDCTstat, cs_start); +- print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DqsRdWrPos_Saved ", pDCTstat->DqsRdWrPos_Saved, 2); +- if (pDCTstat->DqsRdWrPos_Saved == 0xFF) +- break; +- +- print_debug_dqs("\t\tTrainDQSRdWrPos: 22 TrainErrors ",pDCTstat->TrainErrors, 2); +- if (pDCTstat->TrainErrors == 0) { ++ ++ index_reg = 0x98 + 0x100 * Channel; ++ ++ dual_rank = 0; ++ Receiver = mct_InitReceiver_D(pDCTstat, Channel); ++ /* There are four receiver pairs, loosely associated with chipselects. ++ * This is essentially looping over each rank of each DIMM. ++ */ ++ for (; Receiver < 8; Receiver++) { ++ if ((Receiver & 0x1) == 0) { ++ /* Even rank of DIMM */ ++ if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) ++ dual_rank = 1; ++ else ++ dual_rank = 0; ++ } ++ ++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) { ++ continue; ++ } ++ ++ /* Select the base test address for the current rank */ ++ TestAddr = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid); ++ if (!valid) { /* Address not supported on current CS */ ++ continue; ++ } ++ ++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 14 TestAddr ", TestAddr, 4); ++ SetUpperFSbase(TestAddr); /* fs:eax=far ptr to target */ ++ ++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 12 Receiver ", Receiver, 2); ++ ++ /* 2.8.9.9.3 (DRAM Write Data Timing Loop) ++ * Iterate over all possible DQS delay values (0x0 - 0x7f) ++ */ ++ uint8_t test_write_dqs_delay = 0; ++ uint8_t test_read_dqs_delay = 0; ++ uint8_t passing_dqs_delay_found[MAX_BYTE_LANES]; ++ ++ /* Initialize variables */ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ current_write_dqs_delay[lane] = 0; ++ passing_dqs_delay_found[lane] = 0; ++ write_dqs_delay_stepping_done[lane] = 0; ++ } ++ ++ for (test_write_dqs_delay = 0; test_write_dqs_delay < 128; test_write_dqs_delay++) { ++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 16 test_write_dqs_delay ", test_write_dqs_delay, 6); ++ ++ /* Break out of loop if passing window already found, */ ++ if (write_dqs_delay_stepping_done[0] && write_dqs_delay_stepping_done[1] ++ && write_dqs_delay_stepping_done[2] && write_dqs_delay_stepping_done[3] ++ && write_dqs_delay_stepping_done[4] && write_dqs_delay_stepping_done[5] ++ && write_dqs_delay_stepping_done[6] && write_dqs_delay_stepping_done[7]) + break; ++ ++ /* Commit the current Write Data Timing settings to the hardware registers */ ++ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg); ++ ++ /* Write the DRAM training pattern to the base test address */ ++ WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); ++ ++ /* 2.8.9.9.3 (DRAM Read DQS Timing Control Loop) ++ * Iterate over all possible DQS delay values (0x0 - 0x3f) ++ */ ++ for (test_read_dqs_delay = 0; test_read_dqs_delay < 64; test_read_dqs_delay++) { ++ print_debug_dqs("\t\t\t\t\tTrainDQSRdWrPos: 161 test_read_dqs_delay ", test_read_dqs_delay, 6); ++ ++ /* Initialize Read DQS Timing Control settings for this iteration */ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) ++ if (!write_dqs_delay_stepping_done[lane]) ++ current_read_dqs_delay[lane] = test_read_dqs_delay; ++ ++ /* Commit the current Read DQS Timing Control settings to the hardware registers */ ++ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg); ++ ++ /* Initialize test result variable */ ++ bytelane_test_results = 0xff; ++ ++ /* Read the DRAM training pattern from the base test address three times ++ * NOTE ++ * While the BKDG states to read three times this is probably excessive! ++ * Decrease training time by only reading the test pattern once per iteration ++ */ ++ for (iter = 0; iter < 1; iter++) { ++ /* Flush caches */ ++ SetTargetWTIO_D(TestAddr); ++ FlushDQSTestPattern_D(pDCTstat, TestAddr << 8); ++ ResetTargetWTIO_D(); ++ ++ /* Read and compare pattern */ ++ bytelane_test_results &= (CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8) & 0xff); /* [Lane 7 :: Lane 0] 0=fail, 1=pass */ ++ ++ /* If all lanes have already failed testing bypass remaining re-read attempt(s) */ ++ if (bytelane_test_results == 0x0) ++ break; ++ } ++ ++ /* Store any lanes that passed testing for later use */ ++ for (lane = 0; lane < 8; lane++) ++ if (!write_dqs_delay_stepping_done[lane]) ++ dqs_read_results_array[Receiver & 0x1][lane][test_read_dqs_delay] = (!!(bytelane_test_results & (1 << lane))); ++ ++ print_debug_dqs("\t\t\t\t\tTrainDQSRdWrPos: 162 bytelane_test_results ", bytelane_test_results, 6); ++ } ++ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ if (write_dqs_delay_stepping_done[lane]) ++ continue; ++ ++ /* Determine location and length of longest consecutive string of passing values ++ * Output is stored in best_pos and best_count ++ */ ++ last_pos = 0; ++ cur_count = 0; ++ best_pos = 0; ++ best_count = 0; ++ for (iter = 0; iter < 64; iter++) { ++ if ((dqs_read_results_array[Receiver & 0x1][lane][iter]) && (iter < 63)) { ++ /* Pass */ ++ cur_count++; ++ } else { ++ /* Failure or end of loop */ ++ if (cur_count > best_count) { ++ best_count = cur_count; ++ best_pos = last_pos; ++ } ++ cur_count = 0; ++ last_pos = iter; ++ } ++ } ++ ++ if (best_count > 2) { ++ /* Exit the DRAM Write Data Timing Loop after programming the Read DQS Timing Control ++ * register with the center of the passing window ++ */ ++ current_read_dqs_delay[lane] = (best_pos + (best_count / 2)); ++ passing_dqs_delay_found[lane] = 1; ++ ++ /* Commit the current Read DQS Timing Control settings to the hardware registers */ ++ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg); ++ ++ /* Exit the DRAM Write Data Timing Loop */ ++ write_dqs_delay_stepping_done[lane] = 1; ++ ++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 142 largest passing region ", best_count, 4); ++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 143 largest passing region start ", best_pos, 4); ++ } ++ ++ /* Increment the DQS Write Delay value if needed for the next DRAM Write Data Timing Loop iteration */ ++ if (!write_dqs_delay_stepping_done[lane]) ++ current_write_dqs_delay[lane]++; ++ } + } +- Errors |= pDCTstat->TrainErrors; +- } + +- pDCTstat->DqsRdWrPos_Saved = 0; +- if (DQSWrDelay < dqsWrDelay_end) { +- Errors = 0; ++ /* Flag failure(s) if present */ ++ for (lane = 0; lane < 8; lane++) { ++ if (!passing_dqs_delay_found[lane]) { ++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 121 Unable to find passing region for lane ", lane, 2); ++ ++ /* Flag absence of passing window */ ++ Errors |= 1 << SB_NODQSPOS; ++ } ++ } ++ ++ /* Iterate over all possible Write Data Timing values (0x0 - 0x7f) ++ * Note that the Read DQS Timing Control was calibrated / centered in the prior nested loop ++ */ ++ for (test_write_dqs_delay = 0; test_write_dqs_delay < 128; test_write_dqs_delay++) { ++ /* Initialize Write Data Timing settings for this iteration */ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) ++ current_write_dqs_delay[lane] = test_write_dqs_delay; ++ ++ /* Commit the current Write Data Timing settings to the hardware registers */ ++ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg); ++ ++ /* Write the DRAM training pattern to the base test address */ ++ WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); ++ ++ /* Flush caches */ ++ SetTargetWTIO_D(TestAddr); ++ FlushDQSTestPattern_D(pDCTstat, TestAddr << 8); ++ ResetTargetWTIO_D(); ++ ++ /* Read and compare pattern from the base test address */ ++ bytelane_test_results = (CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8) & 0xff); /* [Lane 7 :: Lane 0] 0=fail, 1=pass */ ++ ++ /* Store any lanes that passed testing for later use */ ++ for (lane = 0; lane < 8; lane++) ++ dqs_write_results_array[Receiver & 0x1][lane][test_write_dqs_delay] = (!!(bytelane_test_results & (1 << lane))); ++ } ++ ++ for (lane = 0; lane < 8; lane++) { ++ if ((!dual_rank) || (dual_rank && (Receiver & 0x1))) { ++ ++#ifdef PRINT_PASS_FAIL_BITMAPS ++ for (iter = 0; iter < 64; iter++) { ++ if (dqs_read_results_array[0][lane][iter]) ++ printk(BIOS_DEBUG, "+"); ++ else ++ printk(BIOS_DEBUG, "."); ++ } ++ printk(BIOS_DEBUG, "\n"); ++ for (iter = 0; iter < 64; iter++) { ++ if (dqs_read_results_array[1][lane][iter]) ++ printk(BIOS_DEBUG, "+"); ++ else ++ printk(BIOS_DEBUG, "."); ++ } ++ printk(BIOS_DEBUG, "\n\n"); ++ for (iter = 0; iter < 128; iter++) { ++ if (dqs_write_results_array[0][lane][iter]) ++ printk(BIOS_DEBUG, "+"); ++ else ++ printk(BIOS_DEBUG, "."); ++ } ++ printk(BIOS_DEBUG, "\n"); ++ for (iter = 0; iter < 128; iter++) { ++ if (dqs_write_results_array[1][lane][iter]) ++ printk(BIOS_DEBUG, "+"); ++ else ++ printk(BIOS_DEBUG, "."); ++ } ++ printk(BIOS_DEBUG, "\n\n"); ++#endif ++ ++ /* Base rank of single-rank DIMM, or odd rank of dual-rank DIMM */ ++ if (dual_rank) { ++ /* Intersect the passing windows of both ranks */ ++ for (iter = 0; iter < 64; iter++) ++ if (!dqs_read_results_array[1][lane][iter]) ++ dqs_read_results_array[0][lane][iter] = 0; ++ for (iter = 0; iter < 128; iter++) ++ if (!dqs_write_results_array[1][lane][iter]) ++ dqs_write_results_array[0][lane][iter] = 0; ++ } ++ ++ /* Determine location and length of longest consecutive string of passing values for read DQS timing ++ * Output is stored in best_pos and best_count ++ */ ++ last_pos = 0; ++ cur_count = 0; ++ best_pos = 0; ++ best_count = 0; ++ for (iter = 0; iter < 64; iter++) { ++ if ((dqs_read_results_array[0][lane][iter]) && (iter < 63)) { ++ /* Pass */ ++ cur_count++; ++ } else { ++ /* Failure or end of loop */ ++ if (cur_count > best_count) { ++ best_count = cur_count; ++ best_pos = last_pos; ++ } ++ cur_count = 0; ++ last_pos = iter; ++ } ++ } ++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 144 largest read passing region ", best_count, 4); ++ if (best_count > 0) { ++ if (best_count < MIN_DQS_WNDW) { ++ /* Flag excessively small passing window */ ++ Errors |= 1 << SB_SMALLDQS; ++ } ++ ++ /* Find the center of the passing window */ ++ current_read_dqs_delay[lane] = (best_pos + (best_count / 2)); ++ ++ /* Commit the current Read DQS Timing Control settings to the hardware registers */ ++ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg); ++ ++ /* Save the final Read DQS Timing Control settings for later use */ ++ pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][DQS_READDIR][lane] = current_read_dqs_delay[lane]; ++ } else { ++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 122 Unable to find read passing region for lane ", lane, 2); ++ ++ /* Flag absence of passing window */ ++ Errors |= 1 << SB_NODQSPOS; ++ } ++ ++ /* Determine location and length of longest consecutive string of passing values for write DQS timing ++ * Output is stored in best_pos and best_count ++ */ ++ last_pos = 0; ++ cur_count = 0; ++ best_pos = 0; ++ best_count = 0; ++ for (iter = 0; iter < 128; iter++) { ++ if ((dqs_write_results_array[0][lane][iter]) && (iter < 127)) { ++ /* Pass */ ++ cur_count++; ++ } else { ++ /* Failure or end of loop */ ++ if (cur_count > best_count) { ++ best_count = cur_count; ++ best_pos = last_pos; ++ } ++ cur_count = 0; ++ last_pos = iter; ++ } ++ } ++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 145 largest write passing region ", best_count, 4); ++ if (best_count > 0) { ++ if (best_count < MIN_DQS_WNDW) { ++ /* Flag excessively small passing window */ ++ Errors |= 1 << SB_SMALLDQS; ++ } ++ ++ /* Find the center of the passing window */ ++ current_write_dqs_delay[lane] = (best_pos + (best_count / 2)); ++ ++ /* Commit the current Write Data Timing settings to the hardware registers */ ++ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg); ++ ++ /* Save the final Write Data Timing settings for later use */ ++ pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][DQS_WRITEDIR][lane] = current_write_dqs_delay[lane]; ++ } else { ++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 123 Unable to find write passing region for lane ", lane, 2); ++ ++ /* Flag absence of passing window */ ++ Errors |= 1 << SB_NODQSPOS; ++ } ++ } ++ } + +- print_debug_dqs("\tTrainDQSRdWrPos: 231 DQSWrDelay ", DQSWrDelay, 1); +- TrainWriteDQS_D(pMCTstat, pDCTstat, cs_start); + } +- print_debug_dqs("\tTrainDQSRdWrPos: 232 Errors ", Errors, 1); +- pDCTstat->ErrStatus |= Errors; + } + ++ pDCTstat->TrainErrors |= Errors; ++ pDCTstat->ErrStatus |= Errors; ++ + #if DQS_TRAIN_DEBUG > 0 + { + u8 val; + u8 i; +- u8 Channel, Receiver, Dir; ++ u8 ChannelDTD, ReceiverDTD, Dir; + u8 *p; + + for (Dir = 0; Dir < 2; Dir++) { +@@ -375,14 +749,14 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, + } else { + printk(BIOS_DEBUG, "TrainDQSRdWrPos: CH_D_DIR_B_DQS RD:\n"); + } +- for (Channel = 0; Channel < 2; Channel++) { +- printk(BIOS_DEBUG, "Channel: %02x\n", Channel); +- for (Receiver = cs_start; Receiver < (cs_start + 2); Receiver += 2) { +- printk(BIOS_DEBUG, "\t\tReceiver: %02x: ", Receiver); +- p = pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][Dir]; ++ for (ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) { ++ printk(BIOS_DEBUG, "Channel: %02x\n", ChannelDTD); ++ for (ReceiverDTD = 0; ReceiverDTD < MAX_CS_SUPPORTED; ReceiverDTD += 2) { ++ printk(BIOS_DEBUG, "\t\tReceiver: %02x:", ReceiverDTD); ++ p = pDCTstat->CH_D_DIR_B_DQS[ChannelDTD][ReceiverDTD >> 1][Dir]; + for (i=0;i<8; i++) { + val = p[i]; +- printk(BIOS_DEBUG, "%02x ", val); ++ printk(BIOS_DEBUG, " %02x", val); + } + printk(BIOS_DEBUG, "\n"); + } +@@ -437,225 +811,6 @@ static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat, + pDCTstat->PtrPatternBufA = (u32)buf; + } + +-static void TrainDQSPos_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, +- u8 cs_start) +-{ +- u32 Errors; +- u8 ChipSel, DQSDelay; +- u8 RnkDlySeqPassMin=0, RnkDlySeqPassMax=0xFF, RnkDlyFilterMin=0, RnkDlyFilterMax=0xFF; +- u8 RnkDlySeqPassMinTot=0, RnkDlySeqPassMaxTot=0xFF, RnkDlyFilterMinTot=0, RnkDlyFilterMaxTot=0xFF; +- u8 LastTest ,LastTestTot; +- u32 TestAddr; +- u8 ByteLane; +- u8 MutualCSPassW[128]; +- u8 BanksPresent; +- u8 dqsDelay_end; +- u8 tmp, valid, tmp1; +- u16 word; +- +- /* MutualCSPassW: each byte represents a bitmap of pass/fail per +- * ByteLane. The indext within MutualCSPassW is the delay value +- * given the results. +- */ +- print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3); +- +- Errors = 0; +- BanksPresent = 0; +- +- dqsDelay_end = 32; +- /* Bitmapped status per delay setting, 0xff=All positions +- * passing (1= PASS). Set the entire array. +- */ +- for (DQSDelay=0; DQSDelay<128; DQSDelay++) { +- MutualCSPassW[DQSDelay] = 0xFF; +- } +- +- for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) { /* logical register chipselects 0..7 */ +- print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4); +- +- if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) { +- print_debug_dqs("\t\t\t\tmct_RcvrRankEnabled_D CS not enabled ", ChipSel, 4); +- continue; +- } +- +- BanksPresent = 1; /* flag for at least one bank is present */ +- TestAddr = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel, &valid); +- if (!valid) { +- print_debug_dqs("\t\t\t\tAddress not supported on current CS ", TestAddr, 4); +- continue; +- } +- +- print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4); +- SetUpperFSbase(TestAddr); /* fs:eax=far ptr to target */ +- +- if (pDCTstat->Direction == DQS_READDIR) { +- print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read ", 0, 4); +- WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); +- } +- +- for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) { +- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5); +- +- tmp = 0xFF; +- tmp1 = DQSDelay; +- if (pDCTstat->Direction == DQS_READDIR) { +- tmp &= MutualCSPassW[DQSDelay]; +- tmp1 += dqsDelay_end; +- } +- tmp &= MutualCSPassW[tmp1]; +- +- if (tmp == 0) { +- continue;/* skip current delay value if other chipselects have failed all 8 bytelanes */ +- } +- +- pDCTstat->DQSDelay = DQSDelay; +- mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start); +- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); +- +- if (pDCTstat->Direction == DQS_WRITEDIR) { +- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5); +- WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); +- } +- +- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", pDCTstat->Pattern, 5); +- ReadDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); +- /* print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); */ +- word = CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); /* 0=fail, 1=pass */ +- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 compare 1 ", word, 3); +- +- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 DqsRdWrPos_Saved ", pDCTstat->DqsRdWrPos_Saved, 3); +- word &= ~(pDCTstat->DqsRdWrPos_Saved); /* mask out bytelanes that already passed */ +- word &= ~(pDCTstat->DqsRdWrPos_Saved << 8); +- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 compare 2 ", word, 3); +- +- tmp = DQSDelay; +- if (pDCTstat->Direction == DQS_READDIR) { +- MutualCSPassW[tmp] &= word >> 8; +- tmp += dqsDelay_end; +- } +- MutualCSPassW[tmp] &= word & 0xFF; +- +- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 \tMutualCSPassW ", MutualCSPassW[DQSDelay], 5); +- +- SetTargetWTIO_D(TestAddr); +- FlushDQSTestPattern_D(pDCTstat, TestAddr << 8); +- ResetTargetWTIO_D(); +- } +- +- } +- +- if (pDCTstat->Direction == DQS_READDIR) { +- dqsDelay_end <<= 1; +- } +- +- if (BanksPresent) { +- #if 0 /* show the bitmap */ +- for (ByteLane = 0; ByteLane < 8; ByteLane++) { /* just print ByteLane 0 */ +- for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) { +- if (!(MutualCSPassW[DQSDelay] &(1 << ByteLane))) { +- printk(BIOS_DEBUG, "."); +- } else { +- printk(BIOS_DEBUG, "*"); +- } +- } +- printk(BIOS_DEBUG, "\n"); +- } +- #endif +- for (ByteLane = 0; ByteLane < 8; ByteLane++) { +- print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4); +- if (!(pDCTstat->DqsRdWrPos_Saved &(1 << ByteLane))) { +- pDCTstat->ByteLane = ByteLane; +- LastTest = DQS_FAIL; /* Analyze the results */ +- LastTestTot = DQS_FAIL; +- /* RnkDlySeqPassMin = 0; */ +- /* RnkDlySeqPassMax = 0; */ +- RnkDlyFilterMax = 0; +- RnkDlyFilterMin = 0; +- RnkDlyFilterMaxTot = 0; +- RnkDlyFilterMinTot = 0; +- for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) { +- if (MutualCSPassW[DQSDelay] & (1 << ByteLane)) { +- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5); +- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); +- if (pDCTstat->Direction == DQS_READDIR) +- tmp = 0x20; +- else +- tmp = 0; +- if (DQSDelay >= tmp) { +- RnkDlySeqPassMax = DQSDelay; +- if (LastTest == DQS_FAIL) { +- RnkDlySeqPassMin = DQSDelay; /* start sequential run */ +- } +- if ((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){ +- RnkDlyFilterMin = RnkDlySeqPassMin; +- RnkDlyFilterMax = RnkDlySeqPassMax; +- } +- LastTest = DQS_PASS; +- } +- +- if (pDCTstat->Direction == DQS_READDIR) { +- RnkDlySeqPassMaxTot = DQSDelay; +- if (LastTestTot == DQS_FAIL) +- RnkDlySeqPassMinTot = DQSDelay; +- if ((RnkDlySeqPassMaxTot - RnkDlySeqPassMinTot)>(RnkDlyFilterMaxTot-RnkDlyFilterMinTot)){ +- RnkDlyFilterMinTot = RnkDlySeqPassMinTot; +- RnkDlyFilterMaxTot = RnkDlySeqPassMaxTot; +- } +- LastTestTot = DQS_PASS; +- } +- } else { +- LastTest = DQS_FAIL; +- LastTestTot = DQS_FAIL; +- } +- } +- print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4); +- if (RnkDlySeqPassMax == 0) { +- Errors |= 1 << SB_NODQSPOS; /* no passing window */ +- } else { +- print_debug_dqs_pair("\t\t\t\tTrainDQSPos: 34 RnkDlyFilter: ", RnkDlyFilterMin, " ", RnkDlyFilterMax, 4); +- if (((RnkDlyFilterMax - RnkDlyFilterMin) < MIN_DQS_WNDW)){ +- Errors |= 1 << SB_SMALLDQS; +- } else { +- u8 middle_dqs; +- /* mctEngDQSwindow_Save_D Not required for arrays */ +- if (pDCTstat->Direction == DQS_READDIR) +- middle_dqs = MiddleDQS_D(RnkDlyFilterMinTot, RnkDlyFilterMaxTot); +- else +- middle_dqs = MiddleDQS_D(RnkDlyFilterMin, RnkDlyFilterMax); +- pDCTstat->DQSDelay = middle_dqs; +- mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, cs_start); /* load the register with the value */ +- if (pDCTstat->Direction == DQS_READDIR) +- StoreWrRdDQSDatStrucVal_D(pMCTstat, pDCTstat, cs_start, RnkDlyFilterMinTot, RnkDlyFilterMaxTot); /* store the value into the data structure */ +- else +- StoreWrRdDQSDatStrucVal_D(pMCTstat, pDCTstat, cs_start, RnkDlyFilterMin, RnkDlyFilterMax); /* store the value into the data structure */ +- print_debug_dqs("\t\t\t\tTrainDQSPos: 42 middle_dqs : ",middle_dqs, 4); +- pDCTstat->DqsRdWrPos_Saved |= 1 << ByteLane; +- } +- } +- } +- } /* if (pDCTstat->DqsRdWrPos_Saved &(1 << ByteLane)) */ +- } +-/* skipLocMiddle: */ +- pDCTstat->TrainErrors = Errors; +- +- print_debug_dqs("\t\t\tTrainDQSPos: Errors ", Errors, 3); +-} +- +-static void mctEngDQSwindow_Save_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, u8 ChipSel, +- u8 RnkDlyFilterMin, u8 RnkDlyFilterMax) +-{ +- pDCTstat->CH_D_DIR_MaxMin_B_Dly[pDCTstat->Channel] +- [pDCTstat->Direction] +- [0] +- [pDCTstat->ByteLane] = RnkDlyFilterMin; +- pDCTstat->CH_D_DIR_MaxMin_B_Dly[pDCTstat->Channel] +- [pDCTstat->Direction] +- [1] +- [pDCTstat->ByteLane] = RnkDlyFilterMax; +-} +- + static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 ChipSel) + { +@@ -679,26 +834,6 @@ static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, + pDCTstat->DQSDelay; + } + +-static void StoreWrRdDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, u8 ChipSel, +- u8 RnkDlyFilterMin, u8 RnkDlyFilterMax) +-{ +- u8 dn; +- +- if (pDCTstat->Direction == DQS_WRITEDIR) { +- dn = ChipSel >> 1; +- RnkDlyFilterMin += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane]; +- RnkDlyFilterMax += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane]; +- pDCTstat->DQSDelay += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane]; +- } else { +- RnkDlyFilterMin <<= 1; +- RnkDlyFilterMax <<= 1; +- pDCTstat->DQSDelay <<= 1; +- } +- mctEngDQSwindow_Save_D(pMCTstat, pDCTstat, ChipSel, RnkDlyFilterMin, RnkDlyFilterMax); +- StoreDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel); +-} +- + static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 ChipSel) + { +@@ -720,33 +855,6 @@ static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, + + /* FindDQSDatDimmVal_D is not required since we use an array */ + +-static u8 MiddleDQS_D(u8 min, u8 max) +-{ +- u8 size; +- size = max-min; +- if (size % 2) +- size++; /* round up if the size isn't even. */ +- return ( min + (size >> 1)); +-} +- +-static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, +- u8 cs_start) +-{ +- print_debug_dqs("\t\tTrainReadPos ", 0, 2); +- pDCTstat->Direction = DQS_READDIR; +- TrainDQSPos_D(pMCTstat, pDCTstat, cs_start); +-} +- +-static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, +- u8 cs_start) +-{ +- pDCTstat->Direction = DQS_WRITEDIR; +- print_debug_dqs("\t\tTrainWritePos", 0, 2); +- TrainDQSPos_D(pMCTstat, pDCTstat, cs_start); +-} +- + static void proc_IOCLFLUSH_D(u32 addr_hi) + { + SetTargetWTIO_D(addr_hi); +@@ -963,30 +1071,6 @@ static void ResetTargetWTIO_D(void) + _WRMSR(0xc0010017, lo, hi); /* IORR0 Mask */ + } + +-static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, +- u32 TestAddr_lo) +-{ +- /* Read a pattern of 72 bit times (per DQ), to test dram functionality. +- * The pattern is a stress pattern which exercises both ISI and +- * crosstalk. The number of cache lines to fill is dependent on DCT +- * width mode and burstlength. +- * Mode BL Lines Pattern no. +- * ----+---+------------------- +- * 64 4 9 0 +- * 64 8 9 0 +- * 64M 4 9 0 +- * 64M 8 9 0 +- * 128 4 18 1 +- * 128 8 N/A - +- */ +- if (pDCTstat->Pattern == 0) +- ReadL9TestPattern(TestAddr_lo); +- else +- ReadL18TestPattern(TestAddr_lo); +- _MFENCE; +-} +- + u32 SetUpperFSbase(u32 addr_hi) + { + /* Set the upper 32-bits of the Base address, 4GB aligned) for the +@@ -1009,8 +1093,6 @@ void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index) + Set_NB32_index_wait(dev, index_reg, index, val); + } + +-/* mctEngDQSwindow_Save_D not required with arrays */ +- + void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) + { +@@ -1021,8 +1103,8 @@ void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + pDCTstat = pDCTstatA + Node; + if (pDCTstat->DCTSysLimit) { ++ TrainDQSRdWrPos_D(pMCTstat, pDCTstat); + for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) { +- TrainDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel); + SetEccDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel); + } + } +@@ -1137,27 +1219,6 @@ static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat, + } + } + +-/* +- * mct_SetDQSDelayAllCSR_D: +- * Write the Delay value to all eight byte lanes. +- */ +-static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, +- u8 cs_start) +-{ +- u8 ByteLane; +- u8 ChipSel = cs_start; +- +- for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) { +- if ( mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) { +- for (ByteLane = 0; ByteLane < 8; ByteLane++) { +- pDCTstat->ByteLane = ByteLane; +- mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, ChipSel); +- } +- } +- } +-} +- + u8 mct_RcvrRankEnabled_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u8 Channel, u8 ChipSel) +@@ -1196,7 +1257,7 @@ u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat, + reg = 0x40 + (receiver << 2) + reg_off; + val = Get_NB32(dev, reg); + +- val &= ~0x0F; ++ val &= ~0xe007c01f; + + /* unganged mode DCT0+DCT1, sys addr of DCT1=node + * base+DctSelBaseAddr+local ca base*/ +@@ -1277,6 +1338,7 @@ exitGetAddrWNoError: + print_debug_dqs("mct_GetMCTSysAddr_D: base_addr ", val, 2); + print_debug_dqs("mct_GetMCTSysAddr_D: valid ", *valid, 2); + print_debug_dqs("mct_GetMCTSysAddr_D: status ", pDCTstat->Status, 2); ++ print_debug_dqs("mct_GetMCTSysAddr_D: SysBase ", pDCTstat->DCTSysBase, 2); + print_debug_dqs("mct_GetMCTSysAddr_D: HoleBase ", pDCTstat->DCTHoleBase, 2); + print_debug_dqs("mct_GetMCTSysAddr_D: Cachetop ", pMCTstat->Sub4GCacheTop, 2); + +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c +index 528c782..60bc01d 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -25,7 +26,6 @@ static void EnableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStr + static void DisableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); + static void PrepareC_MCT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); + static void PrepareC_DCT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct); +-static void MultiplyDelay(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct); + static void Restore_OnDimmMirror(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); + static void Clear_OnDimmMirror(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); + +@@ -154,7 +154,6 @@ static void PhyWLPass2(struct MCTStatStruc *pMCTstat, + Clear_OnDimmMirror(pMCTstat, pDCTstat); + SetDllSpeedUp_D(pMCTstat, pDCTstat, dct); + DisableAutoRefresh_D(pMCTstat, pDCTstat); +- MultiplyDelay(pMCTstat, pDCTstat, dct); + for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm ++) { + if (DIMMValid & (1 << (dimm << 1))) + AgesaHwWlPhase1(pDCTstat->C_MCTPtr, pDCTstat->C_DCTPtr[dct], dimm, SecondPass); +@@ -162,6 +161,9 @@ static void PhyWLPass2(struct MCTStatStruc *pMCTstat, + } + } + ++/* Write Levelization Training ++ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.1 ++ */ + static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) + { +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c +index 3d625de..596fb23 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -201,12 +202,13 @@ static void SetMTRRrange_D(u32 Base, u32 *pLimit, u32 *pMtrrAddr, u16 MtrrType) + + void UMAMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA) + { +-/* UMA memory size may need splitting the MTRR configuration into two +- Before training use NB_BottomIO or the physical memory size to set the MTRRs. +- After training, add UMAMemTyping function to reconfigure the MTRRs based on +- NV_BottomUMA (for UMA systems only). +- This two-step process allows all memory to be cached for training +-*/ ++ /* UMA memory size may need splitting the MTRR configuration into two ++ * Before training use NB_BottomIO or the physical memory size to set the MTRRs. ++ * After training, add UMAMemTyping function to reconfigure the MTRRs based on ++ * NV_BottomUMA (for UMA systems only). ++ * This two-step process allows all memory to be cached for training ++ */ ++ + u32 Bottom32bIO, Cache32bTOP; + u32 val; + u32 addr; +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c +index 013a1b9..6f97061 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -140,7 +141,7 @@ void InterleaveNodes_D(struct MCTStatStruc *pMCTstat, + } + + if (DoIntlv) { +- MCTMemClr_D(pMCTstat,pDCTstatA); ++ MCTMemClr_D(pMCTstat, pDCTstatA); + /* Program Interleaving enabled on Node 0 map only.*/ + MemSize0 <<= bsf(Nodes); /* MemSize=MemSize*2 (or 4, or 8) */ + Dct0MemSize <<= bsf(Nodes); +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c +index da2f372..cda9c6b 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -36,10 +37,10 @@ u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2) + val = Get_NB32(pDCTstat->dev_dct, dct * 0x100 + 0x78); + + val &= 7; +- val = ((~val) & 0xFF) + 1; ++ val = ((~val) & 0xff) + 1; + val += 6; +- val &= 0xFF; +- misc2 &= 0xFFF8FFFF; ++ val &= 0x7; ++ misc2 &= 0xfff8ffff; + misc2 |= val << 16; /* DataTxFifoWrDly */ + if (pDCTstat->LogicalCPUID & AMD_DR_Dx) + misc2 |= 1 << 7; /* ProgOdtEn */ +@@ -52,11 +53,15 @@ void mct_ExtMCTConfig_Cx(struct DCTStatStruc *pDCTstat) + u32 val; + + if (pDCTstat->LogicalCPUID & (AMD_DR_Cx)) { +- Set_NB32(pDCTstat->dev_dct, 0x11C, 0x0CE00FC0 | 1 << 29/* FlushWrOnStpGnt */); ++ /* Revision C */ ++ Set_NB32(pDCTstat->dev_dct, 0x11c, 0x0ce00fc0 | 1 << 29/* FlushWrOnStpGnt */); ++ } + +- val = Get_NB32(pDCTstat->dev_dct, 0x1B0); +- val &= 0xFFFFF8C0; ++ if (pDCTstat->LogicalCPUID & (AMD_DR_Cx)) { ++ val = Get_NB32(pDCTstat->dev_dct, 0x1b0); ++ val &= ~0x73f; + val |= 0x101; /* BKDG recommended settings */ +- Set_NB32(pDCTstat->dev_dct, 0x1B0, val); ++ ++ Set_NB32(pDCTstat->dev_dct, 0x1b0, val); + } + } +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c +index 6de2f4e..b21b96a 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -172,6 +173,7 @@ static u32 mct_MR1(struct MCTStatStruc *pMCTstat, + ret |= 1 << 11; + } + ++ /* program MrsAddress[12]=QOFF: based on F2x[1,0]84[Qoff] */ + if (dword & (1 << 13)) + ret |= 1 << 12; + +@@ -199,7 +201,8 @@ static u32 mct_MR0(struct MCTStatStruc *pMCTstat, + /* program MrsAddress[6:4,2]=read CAS latency + (CL):based on F2x[1,0]88[Tcl] */ + dword2 = Get_NB32(dev, reg_off + 0x88); +- ret |= (dword2 & 0xF) << 4; /* F2x88[3:0] to MrsAddress[6:4,2]=xxx0b */ ++ ret |= (dword2 & 0x7) << 4; /* F2x88[2:0] to MrsAddress[6:4] */ ++ ret |= ((dword2 & 0x8) >> 3) << 2; /* F2x88[3] to MrsAddress[2] */ + + /* program MrsAddress[12]=0 (PPD):slow exit */ + if (dword & (1 << 23)) +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c +index 8e5c268..587c414 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -24,25 +25,13 @@ + + static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Pass); +-static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat, +- u8 rcvrEnDly, u8 Channel, +- u8 receiver, u8 Pass); +-static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, +- u32 addr, u8 channel, +- u8 pattern, u8 Pass); + static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); + static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Channel); + static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Channel); +-static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, +- u8 RcvrEnDly, u8 where, +- u8 Channel, u8 Receiver, +- u32 dev, u32 index_reg, +- u8 Addl_Index, u8 Pass); +-static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly); ++static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly); + static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); + static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat); +@@ -50,17 +39,17 @@ static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat); + /* Warning: These must be located so they do not cross a logical 16-bit + segment boundary! */ + static const u32 TestPattern0_D[] = { +- 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, +- 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, +- 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, +- 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, +-}; +-static const u32 TestPattern1_D[] = { + 0x55555555, 0x55555555, 0x55555555, 0x55555555, + 0x55555555, 0x55555555, 0x55555555, 0x55555555, + 0x55555555, 0x55555555, 0x55555555, 0x55555555, + 0x55555555, 0x55555555, 0x55555555, 0x55555555, + }; ++static const u32 TestPattern1_D[] = { ++ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, ++ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, ++ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, ++ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, ++}; + static const u32 TestPattern2_D[] = { + 0x12345678, 0x87654321, 0x23456789, 0x98765432, + 0x59385824, 0x30496724, 0x24490795, 0x99938733, +@@ -104,16 +93,87 @@ void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat, + dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass); + } + ++static void read_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dimm, uint32_t index_reg) ++{ ++ uint8_t lane; ++ uint32_t dword; ++ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ uint32_t wdt_reg; ++ if ((lane == 0) || (lane == 1)) ++ wdt_reg = 0x30; ++ if ((lane == 2) || (lane == 3)) ++ wdt_reg = 0x31; ++ if ((lane == 4) || (lane == 5)) ++ wdt_reg = 0x40; ++ if ((lane == 6) || (lane == 7)) ++ wdt_reg = 0x41; ++ if (lane == 8) ++ wdt_reg = 0x32; ++ wdt_reg += dimm * 3; ++ dword = Get_NB32_index_wait(dev, index_reg, wdt_reg); ++ if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) ++ current_total_delay[lane] = (dword & 0x00ff0000) >> 16; ++ if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) ++ current_total_delay[lane] = dword & 0x000000ff; ++ } ++} ++ ++static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dimm, uint32_t index_reg) ++{ ++ uint8_t lane; ++ uint32_t dword; ++ ++ for (lane = 0; lane < 8; lane++) { ++ uint32_t ret_reg; ++ if ((lane == 0) || (lane == 1)) ++ ret_reg = 0x10; ++ if ((lane == 2) || (lane == 3)) ++ ret_reg = 0x11; ++ if ((lane == 4) || (lane == 5)) ++ ret_reg = 0x20; ++ if ((lane == 6) || (lane == 7)) ++ ret_reg = 0x21; ++ ret_reg += dimm * 3; ++ dword = Get_NB32_index_wait(dev, index_reg, ret_reg); ++ if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) { ++ dword &= ~(0x1ff << 16); ++ dword |= (current_total_delay[lane] & 0x1ff) << 16; ++ } ++ if ((lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) { ++ dword &= ~0x1ff; ++ dword |= current_total_delay[lane] & 0x1ff; ++ } ++ Set_NB32_index_wait(dev, index_reg, ret_reg, dword); ++ } ++} ++ ++static uint32_t convert_testaddr_and_channel_to_address(struct DCTStatStruc *pDCTstat, uint32_t testaddr, uint8_t channel) ++{ ++ SetUpperFSbase(testaddr); ++ testaddr <<= 8; ++ ++ if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) { ++ testaddr += 8; /* second channel */ ++ } ++ ++ return testaddr; ++} ++ ++/* DQS Receiver Enable Training ++ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.2 ++ */ + static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Pass) + { +- u8 Channel, RcvrEnDly, RcvrEnDlyRmin; +- u8 Test0, Test1, CurrTest, CurrTestSide0, CurrTestSide1; +- u8 CTLRMaxDelay, _2Ranks, PatternA, PatternB; ++ u8 Channel; ++ u8 _2Ranks; + u8 Addl_Index = 0; + u8 Receiver; + u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0; +- u8 RcvrEnDlyLimit, Final_Value, MaxDelay_CH[2]; ++ u8 Final_Value; ++ u16 CTLRMaxDelay; ++ u16 MaxDelay_CH[2]; + u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B; + u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */ + u32 Errors; +@@ -127,9 +187,20 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + u32 cr4; + u32 lo, hi; + ++ uint32_t dword; ++ uint8_t rank; ++ uint8_t lane; ++ uint16_t current_total_delay[MAX_BYTE_LANES]; ++ uint16_t candidate_total_delay[8]; ++ uint8_t data_test_pass_sr[2][8]; /* [rank][lane] */ ++ uint8_t data_test_pass[8]; /* [lane] */ ++ uint8_t data_test_pass_prev[8]; /* [lane] */ ++ uint8_t window_det_toggle[8]; ++ uint8_t trained[8]; ++ uint64_t result_qword1; ++ uint64_t result_qword2; ++ + u8 valid; +- u32 tmp; +- u8 LastTest; + + print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0); + print_debug_dqs("TrainRcvEn: Pass", Pass, 0); +@@ -181,33 +252,103 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + + Errors = 0; + dev = pDCTstat->dev_dct; +- CTLRMaxDelay = 0; + + for (Channel = 0; Channel < 2; Channel++) { + print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1); + print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1); + pDCTstat->Channel = Channel; + ++ CTLRMaxDelay = 0; + MaxDelay_CH[Channel] = 0; + index_reg = 0x98 + 0x100 * Channel; + + Receiver = mct_InitReceiver_D(pDCTstat, Channel); +- /* There are four receiver pairs, loosely associated with chipselects. */ ++ /* There are four receiver pairs, loosely associated with chipselects. ++ * This is essentially looping over each DIMM. ++ */ + for (; Receiver < 8; Receiver += 2) { + Addl_Index = (Receiver >> 1) * 3 + 0x10; +- LastTest = DQS_FAIL; +- +- /* mct_ModifyIndex_D */ +- RcvrEnDlyRmin = RcvrEnDlyLimit = 0xff; + + print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2); + +- if(!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) { ++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) { + continue; + } + ++ /* Clear data structures */ ++ for (lane = 0; lane < 8; lane++) { ++ data_test_pass_prev[lane] = 0; ++ trained[lane] = 0; ++ } ++ ++ /* 2.8.9.9.2 (1, 6) ++ * Retrieve gross and fine timing fields from write DQS registers ++ */ ++ read_dqs_write_timing_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); ++ ++ /* 2.8.9.9.2 (1) ++ * Program the Write Data Timing and Write ECC Timing register to ++ * the values stored in the DQS Write Timing Control register ++ * for each lane ++ */ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ uint32_t wdt_reg; ++ ++ /* Calculate Write Data Timing register location */ ++ if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3)) ++ wdt_reg = 0x1; ++ if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7)) ++ wdt_reg = 0x2; ++ if (lane == 8) ++ wdt_reg = 0x3; ++ wdt_reg |= ((Receiver / 2) << 8); ++ ++ /* Set Write Data Timing register values */ ++ dword = Get_NB32_index_wait(dev, index_reg, wdt_reg); ++ if ((lane == 7) || (lane == 3)) { ++ dword &= ~(0x7f << 24); ++ dword |= (current_total_delay[lane] & 0x7f) << 24; ++ } ++ if ((lane == 6) || (lane == 2)) { ++ dword &= ~(0x7f << 16); ++ dword |= (current_total_delay[lane] & 0x7f) << 16; ++ } ++ if ((lane == 5) || (lane == 1)) { ++ dword &= ~(0x7f << 8); ++ dword |= (current_total_delay[lane] & 0x7f) << 8; ++ } ++ if ((lane == 8) || (lane == 4) || (lane == 0)) { ++ dword &= ~0x7f; ++ dword |= current_total_delay[lane] & 0x7f; ++ } ++ Set_NB32_index_wait(dev, index_reg, wdt_reg, dword); ++ } ++ ++ /* 2.8.9.9.2 (2) ++ * Program the Read DQS Timing Control and the Read DQS ECC Timing Control registers ++ * to 1/2 MEMCLK for all lanes ++ */ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ uint32_t rdt_reg; ++ if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3)) ++ rdt_reg = 0x5; ++ if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7)) ++ rdt_reg = 0x6; ++ if (lane == 8) ++ rdt_reg = 0x7; ++ rdt_reg |= ((Receiver / 2) << 8); ++ if (lane == 8) ++ dword = 0x0000003f; ++ else ++ dword = 0x3f3f3f3f; ++ Set_NB32_index_wait(dev, index_reg, rdt_reg, dword); ++ } ++ ++ /* 2.8.9.9.2 (3) ++ * Select two test addresses for each rank present ++ */ + TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid); +- if(!valid) { /* Address not supported on current CS */ ++ if (!valid) { /* Address not supported on current CS */ + continue; + } + +@@ -229,171 +370,215 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2); + print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2); + +- /* +- * Get starting RcvrEnDly value ++ /* 2.8.9.9.2 (4, 5) ++ * Write 1 cache line of the appropriate test pattern to each test addresse + */ +- RcvrEnDly = mct_Get_Start_RcvrEnDly_1Pass(Pass); ++ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, 0); /* rank 0 of DIMM, testpattern 0 */ ++ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, 1); /* rank 0 of DIMM, testpattern 1 */ ++ if (_2Ranks) { ++ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, 0); /*rank 1 of DIMM, testpattern 0 */ ++ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, 1); /*rank 1 of DIMM, testpattern 1 */ ++ } + +- /* mct_GetInitFlag_D*/ +- if (Pass == FirstPass) { +- pDCTstat->DqsRcvEn_Pass = 0; +- } else { +- pDCTstat->DqsRcvEn_Pass=0xFF; ++#if DQS_TRAIN_DEBUG > 0 ++ for (lane = 0; lane < 8; lane++) { ++ print_debug_dqs("\t\tTrainRcvEn54: lane: ", lane, 2); ++ print_debug_dqs("\t\tTrainRcvEn54: current_total_delay ", current_total_delay[lane], 2); + } +- pDCTstat->DqsRcvEn_Saved = 0; ++#endif + ++ /* 2.8.9.9.2 (6) ++ * Write gross and fine timing fields to read DQS registers ++ */ ++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); ++ ++ /* 2.8.9.9.2 (7) ++ * Loop over all delay values up to 1 MEMCLK (0x40 delay steps) from the initial delay values ++ * ++ * FIXME ++ * It is not clear if training should be discontinued if any test failures occur in the first ++ * 1 MEMCLK window, or if it should be discontinued if no successes occur in the first 1 MEMCLK ++ * window. Therefore, loop over up to 2 MEMCLK (0x80 delay steps) to be on the safe side. ++ */ ++ uint16_t current_delay_step; + +- while(RcvrEnDly < RcvrEnDlyLimit) { /* sweep Delay value here */ +- print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3); ++ for (current_delay_step = 0; current_delay_step < 0x80; current_delay_step++) { ++ print_debug_dqs("\t\t\tTrainRcvEn541: current_delay_step ", current_delay_step, 3); + +- /* callback not required +- if(mct_AdjustDelay_D(pDCTstat, RcvrEnDly)) +- goto skipDly; ++ /* 2.8.9.9.2 (7 D) ++ * Terminate if all lanes are trained + */ ++ uint8_t all_lanes_trained = 1; ++ for (lane = 0; lane < 8; lane++) ++ if (!trained[lane]) ++ all_lanes_trained = 0; + +- /* Odd steps get another pattern such that even +- and odd steps alternate. The pointers to the +- patterns will be swaped at the end of the loop +- so that they correspond. */ +- if(RcvrEnDly & 1) { +- PatternA = 1; +- PatternB = 0; +- } else { +- /* Even step */ +- PatternA = 0; +- PatternB = 1; +- } +- +- mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, PatternA); /* rank 0 of DIMM, testpattern 0 */ +- mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, PatternB); /* rank 0 of DIMM, testpattern 1 */ +- if(_2Ranks) { +- mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, PatternA); /*rank 1 of DIMM, testpattern 0 */ +- mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, PatternB); /*rank 1 of DIMM, testpattern 1 */ +- } +- +- mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, 0, Channel, Receiver, dev, index_reg, Addl_Index, Pass); +- +- CurrTest = DQS_FAIL; +- CurrTestSide0 = DQS_FAIL; +- CurrTestSide1 = DQS_FAIL; +- +- mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0); /*cache fills */ +- Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0, Channel, PatternA, Pass);/* ROM vs cache compare */ +- proc_IOCLFLUSH_D(TestAddr0); +- ResetDCTWrPtr_D(dev, index_reg, Addl_Index); +- +- print_debug_dqs("\t\t\tTrainRcvEn542: Test0 result ", Test0, 3); +- +- /* != 0x00 mean pass */ +- +- if(Test0 == DQS_PASS) { +- mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B); /*cache fills */ +- /* ROM vs cache compare */ +- Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0B, Channel, PatternB, Pass); +- proc_IOCLFLUSH_D(TestAddr0B); +- ResetDCTWrPtr_D(dev, index_reg, Addl_Index); +- +- print_debug_dqs("\t\t\tTrainRcvEn543: Test1 result ", Test1, 3); ++ if (all_lanes_trained) ++ break; + +- if(Test1 == DQS_PASS) { +- CurrTestSide0 = DQS_PASS; ++ /* 2.8.9.9.2 (7 A) ++ * Loop over all ranks ++ */ ++ for (rank = 0; rank < (_2Ranks + 1); rank++) { ++ /* 2.8.9.9.2 (7 A a-d) ++ * Read the first test address of the current rank ++ * Store the first data beat for analysis ++ * Reset read pointer in the DRAM controller FIFO ++ * Read the second test address of the current rank ++ * Store the first data beat for analysis ++ * Reset read pointer in the DRAM controller FIFO ++ */ ++ if (rank & 1) { ++ /* 2.8.9.9.2 (7 D) ++ * Invert read instructions to alternate data read order on the bus ++ */ ++ proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B); ++ result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel)); ++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); ++ proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1); ++ result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel)); ++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); ++ } else { ++ proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1); ++ result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel)); ++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); ++ proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B); ++ result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel)); ++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); + } +- } +- if(_2Ranks) { +- mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1); /*cache fills */ +- /* ROM vs cache compare */ +- Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1, Channel, PatternA, Pass); +- proc_IOCLFLUSH_D(TestAddr1); +- ResetDCTWrPtr_D(dev, index_reg, Addl_Index); +- +- print_debug_dqs("\t\t\tTrainRcvEn544: Test0 result ", Test0, 3); +- +- if(Test0 == DQS_PASS) { +- mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B); /*cache fills */ +- /* ROM vs cache compare */ +- Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1B, Channel, PatternB, Pass); +- proc_IOCLFLUSH_D(TestAddr1B); +- ResetDCTWrPtr_D(dev, index_reg, Addl_Index); +- +- print_debug_dqs("\t\t\tTrainRcvEn545: Test1 result ", Test1, 3); +- if(Test1 == DQS_PASS) { +- CurrTestSide1 = DQS_PASS; ++ /* 2.8.9.9.2 (7 A e) ++ * Compare both read patterns and flag passing ranks/lanes ++ */ ++ uint8_t result_lane_byte1; ++ uint8_t result_lane_byte2; ++ for (lane = 0; lane < 8; lane++) { ++ if (trained[lane] == 1) { ++#if DQS_TRAIN_DEBUG > 0 ++ print_debug_dqs("\t\t\t\t\t\t\t\t lane already trained: ", lane, 4); ++#endif ++ continue; + } ++ ++ result_lane_byte1 = (result_qword1 >> (lane * 8)) & 0xff; ++ result_lane_byte2 = (result_qword2 >> (lane * 8)) & 0xff; ++ if ((result_lane_byte1 == 0x55) && (result_lane_byte2 == 0xaa)) ++ data_test_pass_sr[rank][lane] = 1; ++ else ++ data_test_pass_sr[rank][lane] = 0; ++#if DQS_TRAIN_DEBUG > 0 ++ print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", 0x55, " | ", result_lane_byte1, 4); ++ print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", 0xaa, " | ", result_lane_byte2, 4); ++#endif ++ + } + } + +- if(_2Ranks) { +- if ((CurrTestSide0 == DQS_PASS) && (CurrTestSide1 == DQS_PASS)) { +- CurrTest = DQS_PASS; ++ /* 2.8.9.9.2 (7 B) ++ * If DIMM is dual rank, only use delays that pass testing for both ranks ++ */ ++ for (lane = 0; lane < 8; lane++) { ++ if (_2Ranks) { ++ if ((data_test_pass_sr[0][lane]) && (data_test_pass_sr[1][lane])) ++ data_test_pass[lane] = 1; ++ else ++ data_test_pass[lane] = 0; ++ } else { ++ data_test_pass[lane] = data_test_pass_sr[0][lane]; + } +- } else if (CurrTestSide0 == DQS_PASS) { +- CurrTest = DQS_PASS; + } + +- /* record first pass DqsRcvEn to stack */ +- valid = mct_SavePassRcvEnDly_D(pDCTstat, RcvrEnDly, Channel, Receiver, Pass); ++ /* 2.8.9.9.2 (7 E) ++ * For each lane, update the DQS receiver delay setting in support of next iteration ++ */ ++ for (lane = 0; lane < 8; lane++) { ++ if (trained[lane] == 1) ++ continue; ++ ++ /* 2.8.9.9.2 (7 C a) ++ * Save the total delay of the first success after a failure for later use ++ */ ++ if ((data_test_pass[lane] == 1) && (data_test_pass_prev[lane] == 0)) { ++ candidate_total_delay[lane] = current_total_delay[lane]; ++ window_det_toggle[lane] = 0; ++ } + +- /* Break(1:RevF,2:DR) or not(0) FIXME: This comment deosn't make sense */ +- if(valid == 2 || (LastTest == DQS_FAIL && valid == 1)) { +- RcvrEnDlyRmin = RcvrEnDly; +- break; ++ /* 2.8.9.9.2 (7 C b) ++ * If the current delay failed testing add 1/8 UI to the current delay ++ */ ++ if (data_test_pass[lane] == 0) ++ current_total_delay[lane] += 0x4; ++ ++ /* 2.8.9.9.2 (7 C c) ++ * If the current delay passed testing alternately add either 1/32 UI or 1/4 UI to the current delay ++ * If 1.25 UI of delay have been added with no failures the lane is considered trained ++ */ ++ if (data_test_pass[lane] == 1) { ++ /* See if lane is trained */ ++ if ((current_total_delay[lane] - candidate_total_delay[lane]) >= 0x28) { ++ trained[lane] = 1; ++ ++ /* Calculate and set final lane delay value ++ * The final delay is the candidate delay + 7/8 UI ++ */ ++ current_total_delay[lane] = candidate_total_delay[lane] + 0x1c; ++ } else { ++ if (window_det_toggle[lane] == 0) { ++ current_total_delay[lane] += 0x1; ++ window_det_toggle[lane] = 1; ++ } else { ++ current_total_delay[lane] += 0x8; ++ window_det_toggle[lane] = 0; ++ } ++ } ++ } + } + +- LastTest = CurrTest; +- +- /* swap the rank 0 pointers */ +- tmp = TestAddr0; +- TestAddr0 = TestAddr0B; +- TestAddr0B = tmp; +- +- /* swap the rank 1 pointers */ +- tmp = TestAddr1; +- TestAddr1 = TestAddr1B; +- TestAddr1B = tmp; +- +- print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3); ++ /* Update delays in hardware */ ++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); + +- RcvrEnDly++; +- +- } /* while RcvrEnDly */ +- +- print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2); +- print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyRmin ", RcvrEnDlyRmin, 3); +- print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyLimit ", RcvrEnDlyLimit, 3); +- if(RcvrEnDlyRmin == RcvrEnDlyLimit) { +- /* no passing window */ +- pDCTstat->ErrStatus |= 1 << SB_NORCVREN; +- Errors |= 1 << SB_NORCVREN; +- pDCTstat->ErrCode = SC_FatalErr; ++ /* Save previous results for comparison in the next iteration */ ++ for (lane = 0; lane < 8; lane++) ++ data_test_pass_prev[lane] = data_test_pass[lane]; + } + +- if(RcvrEnDly > (RcvrEnDlyLimit - 1)) { +- /* passing window too narrow, too far delayed*/ +- pDCTstat->ErrStatus |= 1 << SB_SmallRCVR; +- Errors |= 1 << SB_SmallRCVR; +- pDCTstat->ErrCode = SC_FatalErr; +- RcvrEnDly = RcvrEnDlyLimit - 1; +- pDCTstat->CSTrainFail |= 1 << Receiver; +- pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel); +- } +- +- /* CHB_D0_B0_RCVRDLY set in mct_Average_RcvrEnDly_Pass */ +- mct_Average_RcvrEnDly_Pass(pDCTstat, RcvrEnDly, RcvrEnDlyLimit, Channel, Receiver, Pass); +- +- mct_SetFinalRcvrEnDly_D(pDCTstat, RcvrEnDly, Final_Value, Channel, Receiver, dev, index_reg, Addl_Index, Pass); ++#if DQS_TRAIN_DEBUG > 0 ++ for (lane = 0; lane < 8; lane++) ++ print_debug_dqs_pair("\t\tTrainRcvEn55: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2); ++#endif + +- if(pDCTstat->ErrStatus & (1 << SB_SmallRCVR)) { +- Errors |= 1 << SB_SmallRCVR; +- } ++ /* Find highest delay value and save for later use */ ++ for (lane = 0; lane < 8; lane++) ++ if (current_total_delay[lane] > CTLRMaxDelay) ++ CTLRMaxDelay = current_total_delay[lane]; + +- RcvrEnDly += Pass1MemClkDly; +- if(RcvrEnDly > CTLRMaxDelay) { +- CTLRMaxDelay = RcvrEnDly; ++ /* See if any lanes failed training, and set error flags appropriately ++ * For all trained lanes, save delay values for later use ++ */ ++ for (lane = 0; lane < 8; lane++) { ++ if (trained[lane]) { ++ pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1][lane] = current_total_delay[lane]; ++ } else { ++ printk(BIOS_WARNING, "TrainRcvrEn: WARNING: Lane %d of receiver %d on channel %d failed training!\n", lane, Receiver, Channel); ++ ++ /* Set error flags */ ++ pDCTstat->ErrStatus |= 1 << SB_NORCVREN; ++ Errors |= 1 << SB_NORCVREN; ++ pDCTstat->ErrCode = SC_FatalErr; ++ pDCTstat->CSTrainFail |= 1 << Receiver; ++ pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel); ++ } + } + +- } /* while Receiver */ ++ /* 2.8.9.9.2 (8) ++ * Flush the receiver FIFO ++ * Write one full cache line of non-0x55/0xaa data to one of the test addresses, then read it back to flush the FIFO ++ */ ++ ++ WriteLNTestPattern(TestAddr0 << 8, (uint8_t *)TestPattern2_D, 1); ++ mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0); ++ } + MaxDelay_CH[Channel] = CTLRMaxDelay; +- } /* for Channel */ ++ } + + CTLRMaxDelay = MaxDelay_CH[0]; + if (MaxDelay_CH[1] > CTLRMaxDelay) +@@ -428,31 +613,31 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + + #if DQS_TRAIN_DEBUG > 0 + { +- u8 Channel; ++ u8 ChannelDTD; + printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n"); +- for(Channel = 0; Channel<2; Channel++) { ++ for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) { + printk(BIOS_DEBUG, "Channel:%x: %x\n", +- Channel, pDCTstat->CH_MaxRdLat[Channel]); ++ ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]); + } + } + #endif + + #if DQS_TRAIN_DEBUG > 0 + { +- u8 val; +- u8 Channel, Receiver; ++ u16 valDTD; ++ u8 ChannelDTD, ReceiverDTD; + u8 i; +- u8 *p; ++ u16 *p; + + printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n"); +- for(Channel = 0; Channel < 2; Channel++) { +- printk(BIOS_DEBUG, "Channel:%x\n", Channel); +- for(Receiver = 0; Receiver<8; Receiver+=2) { +- printk(BIOS_DEBUG, "\t\tReceiver:%x:", Receiver); +- p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver>>1]; ++ for(ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) { ++ printk(BIOS_DEBUG, "Channel:%x\n", ChannelDTD); ++ for(ReceiverDTD = 0; ReceiverDTD<8; ReceiverDTD+=2) { ++ printk(BIOS_DEBUG, "\t\tReceiver:%x:", ReceiverDTD); ++ p = pDCTstat->CH_D_B_RCVRDLY[ChannelDTD][ReceiverDTD>>1]; + for (i=0;i<8; i++) { +- val = p[i]; +- printk(BIOS_DEBUG, "%x ", val); ++ valDTD = p[i]; ++ printk(BIOS_DEBUG, " %03x", valDTD); + } + printk(BIOS_DEBUG, "\n"); + } +@@ -475,15 +660,6 @@ u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct) + } + } + +-static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 where, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass/*, u8 *p*/) +-{ +- /* +- * Program final DqsRcvEnDly to additional index for DQS receiver +- * enabled delay +- */ +- mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, where, Channel, Receiver, dev, index_reg, Addl_Index, Pass); +-} +- + static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat) + { + u8 ch_end, ch; +@@ -514,17 +690,20 @@ static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat) + * Function only used once so it was inlined. + */ + +-void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, ++/* Set F2x[1, 0]9C_x[2B:10] DRAM DQS Receiver Enable Timing Control Registers ++ * See BKDG Rev. 3.62 page 268 for more information ++ */ ++void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly, + u8 FinalValue, u8 Channel, u8 Receiver, u32 dev, + u32 index_reg, u8 Addl_Index, u8 Pass) + { + u32 index; + u8 i; +- u8 *p; ++ u16 *p; + u32 val; + +- if(RcvrEnDly == 0xFE) { +- /*set the boudary flag */ ++ if(RcvrEnDly == 0x1fe) { ++ /*set the boundary flag */ + pDCTstat->Status |= 1 << SB_DQSRcvLimit; + } + +@@ -543,27 +722,57 @@ void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, + val = Get_NB32_index_wait(dev, index_reg, index); + if(i & 1) { + /* odd byte lane */ +- val &= ~(0xFF << 16); +- val |= (RcvrEnDly << 16); ++ val &= ~(0x1ff << 16); ++ val |= ((RcvrEnDly & 0x1ff) << 16); + } else { + /* even byte lane */ +- val &= ~0xFF; +- val |= RcvrEnDly; ++ val &= ~0x1ff; ++ val |= (RcvrEnDly & 0x1ff); + } + Set_NB32_index_wait(dev, index_reg, index, val); + } + + } + +-static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly) ++/* Calculate MaxRdLatency ++ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.5 ++ */ ++static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly) + { + u32 dev; + u32 reg; +- u16 SubTotal; ++ u32 SubTotal; + u32 index_reg; + u32 reg_off; + u32 val; +- u32 valx; ++ ++ uint8_t cpu_val_n; ++ uint8_t cpu_val_p; ++ ++ u16 freq_tab[] = {400, 533, 667, 800}; ++ ++ /* Set up processor-dependent values */ ++ if (pDCTstat->LogicalCPUID & AMD_DR_Dx) { ++ /* Revision D and above */ ++ cpu_val_n = 4; ++ cpu_val_p = 29; ++ } else if (pDCTstat->LogicalCPUID & AMD_DR_Cx) { ++ /* Revision C */ ++ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE); ++ if ((package_type == PT_L1) /* Socket F (1207) */ ++ || (package_type == PT_M2) /* Socket AM3 */ ++ || (package_type == PT_S1)) { /* Socket S1g<x> */ ++ cpu_val_n = 10; ++ cpu_val_p = 11; ++ } else { ++ cpu_val_n = 4; ++ cpu_val_p = 29; ++ } ++ } else { ++ /* Revision B and below */ ++ cpu_val_n = 10; ++ cpu_val_p = 11; ++ } + + if(pDCTstat->GangedMode) + Channel = 0; +@@ -598,49 +807,32 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQ + val = Get_NB32(dev, 0x78 + reg_off); + SubTotal += 8 - (val & 0x0f); + +- /* Convert bits 7-5 (also referred to as the course delay) of ++ /* Convert bits 7-5 (also referred to as the coarse delay) of + * the current (or worst case) DQS receiver enable delay to + * 1/2 MEMCLKs units, rounding up, and add this to the sub-total. + */ +- SubTotal += DQSRcvEnDly >> 5; /*BOZO-no rounding up */ ++ SubTotal += DQSRcvEnDly >> 5; /* Retrieve gross delay portion of value */ + +- /* Add 5.5 to the sub-total. 5.5 represents part of the ++ /* Add "P" to the sub-total. "P" represents part of the + * processor specific constant delay value in the DRAM + * clock domain. + */ + SubTotal <<= 1; /*scale 1/2 MemClk to 1/4 MemClk */ +- SubTotal += 11; /*add 5.5 1/2MemClk */ ++ SubTotal += cpu_val_p; /*add "P" 1/2MemClk */ ++ SubTotal >>= 1; /*scale 1/4 MemClk back to 1/2 MemClk */ + + /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge +- * clocks (NCLKs) as follows (assuming DDR400 and assuming +- * that no P-state or link speed changes have occurred). ++ * clocks (NCLKs) + */ ++ SubTotal *= 200 * ((Get_NB32(pDCTstat->dev_nbmisc, 0xd4) & 0x1f) + 4); ++ SubTotal /= freq_tab[((Get_NB32(pDCTstat->dev_dct, 0x94 + reg_off) & 0x7) - 3)]; ++ SubTotal = (SubTotal + (2 - 1)) / 2; /* Round up */ + +- /* New formula: +- * SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */ +- val = Get_NB32(dev, 0x94 + reg_off); +- +- /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */ +- val &= 7; +- if (val >= 3) { +- val <<= 1; +- } else +- val += 3; +- valx = val << 2; +- +- val = Get_NB32(pDCTstat->dev_nbmisc, 0xD4); +- SubTotal *= ((val & 0x1f) + 4 ) * 3; +- +- SubTotal /= valx; +- if (SubTotal % valx) { /* round up */ +- SubTotal++; +- } +- +- /* Add 5 NCLKs to the sub-total. 5 represents part of the ++ /* Add "N" NCLKs to the sub-total. "N" represents part of the + * processor specific constant value in the northbridge + * clock domain. + */ +- SubTotal += 5; ++ SubTotal += (cpu_val_n) / 2; + + pDCTstat->CH_MaxRdLat[Channel] = SubTotal; + if(pDCTstat->GangedMode) { +@@ -659,143 +851,6 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQ + Set_NB32(dev, reg, val); + } + +-static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat, +- u8 rcvrEnDly, u8 Channel, +- u8 receiver, u8 Pass) +-{ +- u8 i; +- u8 mask_Saved, mask_Pass; +- u8 *p; +- +- /* calculate dimm offset +- * not needed for CH_D_B_RCVRDLY array +- */ +- +- /* cmp if there has new DqsRcvEnDly to be recorded */ +- mask_Pass = pDCTstat->DqsRcvEn_Pass; +- +- if(Pass == SecondPass) { +- mask_Pass = ~mask_Pass; +- } +- +- mask_Saved = pDCTstat->DqsRcvEn_Saved; +- if(mask_Pass != mask_Saved) { +- +- /* find desired stack offset according to channel/dimm/byte */ +- if(Pass == SecondPass) { +- /* FIXME: SecondPass is never used for Barcelona p = pDCTstat->CH_D_B_RCVRDLY_1[Channel][receiver>>1]; */ +- p = 0; /* Keep the compiler happy. */ +- } else { +- mask_Saved &= mask_Pass; +- p = pDCTstat->CH_D_B_RCVRDLY[Channel][receiver>>1]; +- } +- for(i=0; i < 8; i++) { +- /* cmp per byte lane */ +- if(mask_Pass & (1 << i)) { +- if(!(mask_Saved & (1 << i))) { +- /* save RcvEnDly to stack, according to +- the related Dimm/byte lane */ +- p[i] = (u8)rcvrEnDly; +- mask_Saved |= 1 << i; +- } +- } +- } +- pDCTstat->DqsRcvEn_Saved = mask_Saved; +- } +- return mct_SaveRcvEnDly_D_1Pass(pDCTstat, Pass); +-} +- +-static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, +- u32 addr, u8 channel, +- u8 pattern, u8 Pass) +-{ +- /* Compare only the first beat of data. Since target addrs are cache +- * line aligned, the Channel parameter is used to determine which +- * cache QW to compare. +- */ +- +- u8 *test_buf; +- u8 i; +- u8 result; +- u8 value; +- +- if(Pass == FirstPass) { +- if(pattern==1) { +- test_buf = (u8 *)TestPattern1_D; +- } else { +- test_buf = (u8 *)TestPattern0_D; +- } +- } else { /* Second Pass */ +- test_buf = (u8 *)TestPattern2_D; +- } +- +- SetUpperFSbase(addr); +- addr <<= 8; +- +- if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) { +- addr += 8; /* second channel */ +- test_buf += 8; +- } +- +- print_debug_dqs_pair("\t\t\t\t\t\t test_buf = ", (u32)test_buf, " | addr_lo = ", addr, 4); +- for (i=0; i<8; i++, addr ++) { +- value = read32_fs(addr); +- print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", test_buf[i], " | ", value, 4); +- +- if (value == test_buf[i]) { +- pDCTstat->DqsRcvEn_Pass |= (1<<i); +- } else { +- pDCTstat->DqsRcvEn_Pass &= ~(1<<i); +- } +- } +- +- result = DQS_FAIL; +- +- if (Pass == FirstPass) { +- /* if first pass, at least one byte lane pass +- * ,then DQS_PASS=1 and will set to related reg. +- */ +- if(pDCTstat->DqsRcvEn_Pass != 0) { +- result = DQS_PASS; +- } else { +- result = DQS_FAIL; +- } +- +- } else { +- /* if second pass, at least one byte lane fail +- * ,then DQS_FAIL=1 and will set to related reg. +- */ +- if(pDCTstat->DqsRcvEn_Pass != 0xFF) { +- result = DQS_FAIL; +- } else { +- result = DQS_PASS; +- } +- } +- +- /* if second pass, we can't find the fail until FFh, +- * then let it fail to save the final delay +- */ +- if((Pass == SecondPass) && (pDCTstat->Status & (1 << SB_DQSRcvLimit))) { +- result = DQS_FAIL; +- pDCTstat->DqsRcvEn_Pass = 0; +- } +- +- /* second pass needs to be inverted +- * FIXME? this could be inverted in the above code to start with... +- */ +- if(Pass == SecondPass) { +- if (result == DQS_PASS) { +- result = DQS_FAIL; +- } else if (result == DQS_FAIL) { /* FIXME: doesn't need to be else if */ +- result = DQS_PASS; +- } +- } +- +- +- return result; +-} +- + static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) + { +@@ -854,7 +909,7 @@ void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel) + u32 index_reg; + u32 index; + u8 ChipSel; +- u8 *p; ++ u16 *p; + u32 val; + + dev = pDCTstat->dev_dct; +@@ -884,7 +939,7 @@ static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, + + for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) { + if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) { +- u8 *p; ++ u16 *p; + p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1]; + + /* DQS Delay Value of Data Bytelane +@@ -920,6 +975,10 @@ static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, + SetEccDQSRcvrEn_D(pDCTstat, Channel); + } + ++/* 2.8.9.9.4 ++ * ECC Byte Lane Training ++ * DQS Receiver Enable Delay ++ */ + void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) + { +@@ -1017,7 +1076,9 @@ static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat, + avRecValue -= 3; + else + */ +- if (pDCTstat->LogicalCPUID & AMD_DR_Cx) ++ if (pDCTstat->LogicalCPUID & AMD_DR_Dx) ++ avRecValue -= 8; ++ else if (pDCTstat->LogicalCPUID & AMD_DR_Cx) + avRecValue -= 8; + else if (pDCTstat->LogicalCPUID & AMD_DR_Bx) + avRecValue -= 8; +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c +index c009756..f01e011 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -36,17 +37,12 @@ u32 SetupDqsPattern_1PassB(u8 pass) + return (u32) TestPattern0_D; + } + +-u8 mct_Get_Start_RcvrEnDly_1Pass(u8 pass) +-{ +- return 0; +-} +- +-static u8 mct_Average_RcvrEnDly_1Pass(struct DCTStatStruc *pDCTstat, u8 Channel, u8 Receiver, ++static u16 mct_Average_RcvrEnDly_1Pass(struct DCTStatStruc *pDCTstat, u8 Channel, u8 Receiver, + u8 Pass) + { +- u8 i, MaxValue; +- u8 *p; +- u8 val; ++ u16 i, MaxValue; ++ u16 *p; ++ u16 val; + + MaxValue = 0; + p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1]; +@@ -76,8 +72,8 @@ u8 mct_SaveRcvEnDly_D_1Pass(struct DCTStatStruc *pDCTstat, u8 pass) + return ret; + } + +-u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, +- u8 RcvrEnDly, u8 RcvrEnDlyLimit, ++u16 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, ++ u16 RcvrEnDly, u16 RcvrEnDlyLimit, + u8 Channel, u8 Receiver, u8 Pass) + + { +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c +index b01889d..796febc 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -74,15 +75,15 @@ u8 mct_Get_Start_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, + return RcvrEnDly; + } + +-u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, +- u8 RcvrEnDly, u8 RcvrEnDlyLimit, ++u16 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, ++ u16 RcvrEnDly, u16 RcvrEnDlyLimit, + u8 Channel, u8 Receiver, u8 Pass) + { + u8 i; +- u8 *p; +- u8 *p_1; +- u8 val; +- u8 val_1; ++ u16 *p; ++ u16 *p_1; ++ u16 val; ++ u16 val_1; + u8 valid = 1; + u8 bn; + +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c +index ea5c8c7..920f514 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -191,10 +192,10 @@ static void maxRdLatencyTrain_D(struct MCTStatStruc *pMCTstat, + + #if DQS_TRAIN_DEBUG > 0 + { +- u8 Channel; ++ u8 ChannelDTD; + printk(BIOS_DEBUG, "maxRdLatencyTrain: CH_MaxRdLat:\n"); +- for(Channel = 0; Channel<2; Channel++) { +- printk(BIOS_DEBUG, "Channel: %02x: %02x\n", Channel, pDCTstat->CH_MaxRdLat[Channel]); ++ for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) { ++ printk(BIOS_DEBUG, "Channel: %02x: %02x\n", ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]); + } + } + #endif +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c +index cdeae49..1c3e322 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -58,9 +59,9 @@ void PrepareC_DCT(struct MCTStatStruc *pMCTstat, + pDCTstat->C_DCTPtr[dct]->LogicalCPUID = pDCTstat->LogicalCPUID; + + for (dimm = 0; dimm < MAX_DIMMS; dimm++) { +- if (DimmValid & (1 << dimm)) ++ if (DimmValid & (1 << (dimm << 1))) + pDCTstat->C_DCTPtr[dct]->DimmPresent[dimm] = 1; +- if (Dimmx8Present & (1 << dimm)) ++ if (Dimmx8Present & (1 << (dimm << 1))) + pDCTstat->C_DCTPtr[dct]->DimmX8Present[dimm] = 1; + } + +@@ -88,9 +89,9 @@ void PrepareC_DCT(struct MCTStatStruc *pMCTstat, + u8 DimmRanks; + if (DimmValid & (1 << (dimm << 1))) { + DimmRanks = 1; +- if (pDCTstat->DimmDRPresent & (1 << (dimm+dct))) ++ if (pDCTstat->DimmDRPresent & (1 << ((dimm << 1) + dct))) + DimmRanks = 2; +- else if (pDCTstat->DimmQRPresent & (1 << (dimm+dct))) ++ else if (pDCTstat->DimmQRPresent & (1 << ((dimm << 1) + dct))) + DimmRanks = 4; + } else + DimmRanks = 0; +@@ -249,35 +250,6 @@ static void ChangeMemClk(struct MCTStatStruc *pMCTstat, + } + } + +-/* Multiply the previously saved delay values in Pass 1, step #5 by +- (target frequency)/400 to find the gross and fine delay initialization +- values at the target frequency. +- */ +-void MultiplyDelay(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, u8 dct) +-{ +- u16 index; +- u8 Multiplier; +- u8 gross, fine; +- u16 total; +- +- Multiplier = pDCTstat->TargetFreq; +- +- for (index=0; index < MAX_BYTE_LANES*MAX_LDIMMS; index ++) { +- gross = pDCTstat->C_DCTPtr[dct]->WLGrossDelay[index]; +- fine = pDCTstat->C_DCTPtr[dct]->WLFineDelay[index]; +- +- total = gross << 5 | fine; +- total *= Multiplier; +- if (total % 3) +- total = total / 3 + 1; +- else +- total = total / 3; +- pDCTstat->C_DCTPtr[dct]->WLGrossDelay[index] = (total & 0xFF) >> 5; +- pDCTstat->C_DCTPtr[dct]->WLFineDelay[index] = total & 0x1F; +- } +-} +- + /* + * the DRAM controller to bring the DRAMs out of self refresh mode. + */ +@@ -352,9 +324,9 @@ void SetTargetFreq(struct MCTStatStruc *pMCTstat, + + if (!DCT1Present) + pDCTstat->CSPresent = pDCTstat->CSPresent_DCT[0]; +- else if (pDCTstat->GangedMode) { ++ else if (pDCTstat->GangedMode) + pDCTstat->CSPresent = 0; +- } else ++ else + pDCTstat->CSPresent = pDCTstat->CSPresent_DCT[1]; + + FreqChgCtrlWrd(pMCTstat, pDCTstat); +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c +index 212a348..c76476b 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -235,6 +236,65 @@ u32 swapBankBits(sDCTStruct *pDCTData, u32 MRSValue) + return MRSValue; + } + ++static uint16_t unbuffered_dimm_nominal_termination_emrs(uint8_t number_of_dimms, uint8_t frequency_index, uint8_t rank_count, uint8_t rank) ++{ ++ uint16_t term; ++ ++ /* FIXME ++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel ++ * For now assume a maximum of 2 DIMMs per channel can be installed ++ */ ++ uint8_t MaxDimmsInstallable = 2; ++ ++ if (number_of_dimms == 1) { ++ if (MaxDimmsInstallable < 3) { ++ term = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */ ++ } else { ++ if (rank_count == 1) { ++ term = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */ ++ } else { ++ if (rank == 0) ++ term = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */ ++ else ++ term = 0x00; /* Rtt_Nom=OFF */ ++ } ++ } ++ } else { ++ if (frequency_index < 5) ++ term = 0x0044; /* Rtt_Nom=RZQ/6=40 Ohm */ ++ else ++ term = 0x0204; /* Rtt_Nom=RZQ/8=30 Ohm */ ++ } ++ ++ return term; ++} ++ ++static uint16_t unbuffered_dimm_dynamic_termination_emrs(uint8_t number_of_dimms, uint8_t frequency_index, uint8_t rank_count, uint8_t rank) ++{ ++ uint16_t term; ++ ++ /* FIXME ++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel ++ * For now assume a maximum of 2 DIMMs per channel can be installed ++ */ ++ uint8_t MaxDimmsInstallable = 2; ++ ++ if (number_of_dimms == 1) { ++ if (MaxDimmsInstallable < 3) { ++ term = 0x00; /* Rtt_WR=off */ ++ } else { ++ if (rank_count == 1) ++ term = 0x00; /* Rtt_WR=off */ ++ else ++ term = 0x200; /* Rtt_WR=RZQ/4=60 Ohm */ ++ } ++ } else { ++ term = 0x400; /* Rtt_WR=RZQ/2=120 Ohm */ ++ } ++ ++ return term; ++} ++ + /*----------------------------------------------------------------------------- + * void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *DCTData, u8 Dimm, BOOL WL) + * +@@ -295,48 +355,23 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl) + if (pDCTData->Status[DCT_STATUS_REGISTERED]) { + tempW1 = RttNomTargetRegDimm(pMCTData, pDCTData, dimm, wl, MemClkFreq, rank); + } else { +- if (wl) +- { +- if (pDCTData->MaxDimmsInstalled == 1) +- { +- if ((pDCTData->DimmRanks[dimm] == 2) && (rank == 0)) +- { +- tempW1 = 0x00; /* Rtt_Nom=OFF */ +- } ++ if (wl) { ++ if (rank == 0) { ++ /* Get Rtt_WR for the current DIMM and rank */ ++ uint16_t dynamic_term = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank); ++ ++ /* Convert dynamic termination code to corresponding nominal termination code */ ++ if (dynamic_term == 0x200) ++ tempW1 = 0x04; ++ else if (dynamic_term == 0x400) ++ tempW1 = 0x40; + else +- { +- tempW1 = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */ +- } +- } +- else /* 2 Dimms or more per channel */ +- { +- if ((pDCTData->DimmRanks[dimm] == 2) && (rank == 1)) +- { +- tempW1 = 0x00; /* Rtt_Nom=OFF */ +- } +- else +- { +- if (MemClkFreq == 6) { +- tempW1 = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */ +- } else { +- tempW1 = 0x40;/* Rtt_Nom=RZQ/2=120 Ohm */ +- } +- } +- } +- } +- else { /* 1 or 4 Dimms per channel */ +- if ((pDCTData->MaxDimmsInstalled == 1) || (pDCTData->MaxDimmsInstalled == 4)) +- { +- tempW1 = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */ +- } +- else /* 2 or 3 Dimms per channel */ +- { +- if (MemClkFreq < 5) { +- tempW1 = 0x0044; /* Rtt_Nom=RZQ/6=40 Ohm */ +- } else { +- tempW1 = 0x0204; /* Rtt_Nom=RZQ/8=30 Ohm */ +- } ++ tempW1 = 0x0; ++ } else { ++ tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank); + } ++ } else { ++ tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank); + } + } + tempW=tempW|tempW1; +@@ -353,20 +388,22 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl) + else + { + /* Disable the output drivers of all other ranks for +- * the target DIMM. */ ++ * the target DIMM. ++ */ + tempW = bitTestSet(tempW1, Qoff); + } + } +- /* program MrsAddress[5,1]=output driver impedance control (DIC): +- * based on F2x[1,0]84[DrvImpCtrl] */ ++ /* Program MrsAddress[5,1]=output driver impedance control (DIC): ++ * based on F2x[1,0]84[DrvImpCtrl] ++ */ + tempW1 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, + FUN_DCT, DRAM_MRS_REGISTER, DrvImpCtrlStart, DrvImpCtrlEnd); +- if (bitTest(tempW1,1)) +- {tempW = bitTestSet(tempW, 5);} +- if (bitTest(tempW1,0)) +- {tempW = bitTestSet(tempW, 1);} ++ if (bitTest(tempW1, 1)) ++ tempW = bitTestSet(tempW, 5); ++ if (bitTest(tempW1, 0)) ++ tempW = bitTestSet(tempW, 1); + +- tempW = swapAddrBits_wl(pDCTData,tempW); ++ tempW = swapAddrBits_wl(pDCTData, tempW); + + set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT, + DRAM_INIT, MrsAddressStart, MrsAddressEnd, tempW); +@@ -404,29 +441,10 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl) + if ((pDCTData->LogicalCPUID & AMD_DR_Bx) && (pDCTData->Status[DCT_STATUS_REGISTERED])) + tempW+=0x8; + /* determine Rtt_WR for WL & Normal mode */ +- if (pDCTData->Status[DCT_STATUS_REGISTERED]) { ++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) + tempW1 = RttWrRegDimm(pMCTData, pDCTData, dimm, wl, MemClkFreq, rank); +- } else { +- if (wl) +- { +- tempW1 = 0x00; /* Rtt_WR=off */ +- } +- else +- { +- if (pDCTData->MaxDimmsInstalled == 1) +- { +- tempW1 = 0x00; /* Rtt_WR=off */ +- } +- else +- { +- if (MemClkFreq == 6) { +- tempW1 = 0x200; /* Rtt_WR=RZQ/4=60 Ohm */ +- } else { +- tempW1 = 0x400; /* Rtt_WR=RZQ/2 */ +- } +- } +- } +- } ++ else ++ tempW1 = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank); + tempW=tempW|tempW1; + tempW = swapAddrBits_wl(pDCTData,tempW); + set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT, +@@ -483,38 +501,10 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl) + } + + /* determine Rtt_Nom for WL & Normal mode */ +- if (pDCTData->Status[DCT_STATUS_REGISTERED]) { ++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) + tempW1 = RttNomNonTargetRegDimm(pMCTData, pDCTData, currDimm, wl, MemClkFreq, rank); +- } else { +- if (wl) +- { +- if ((pDCTData->DimmRanks[currDimm] == 2) && (rank == 1)) +- { +- tempW1 = 0x00; /* Rtt_Nom=OFF */ +- } +- else +- { +- if (MemClkFreq < 5) { +- tempW1 = 0x0044;/* Rtt_Nom=RZQ/6=40 Ohm */ +- } else { +- tempW1 = 0x0204;/* Rtt_Nom=RZQ/8=30 Ohm */ +- } +- } +- } +- else { /* 1 or 4 Dimms per channel */ +- if (pDCTData->MaxDimmsInstalled == 4) +- { +- tempW1 = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */ +- } +- else { /* 2 or 3 Dimms per channel */ +- if (MemClkFreq < 5) { +- tempW1 = 0x0044; /* Rtt_Nom=RZQ/6=40 Ohm */ +- } else { +- tempW1 = 0x0204; /* Rtt_Nom=RZQ/8=30 Ohm */ +- } +- } +- } +- } ++ else ++ tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank); + tempW=tempW|tempW1; + /* program MrsAddress[5,1]=output driver impedance control (DIC): + * based on F2x[1,0]84[DrvImpCtrl] */ +@@ -560,22 +550,10 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl) + if ((pDCTData->LogicalCPUID & AMD_DR_Bx) && (pDCTData->Status[DCT_STATUS_REGISTERED])) + tempW+=0x8; + /* determine Rtt_WR for WL & Normal mode */ +- if (pDCTData->Status[DCT_STATUS_REGISTERED]) { ++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) + tempW1 = RttWrRegDimm(pMCTData, pDCTData, currDimm, wl, MemClkFreq, rank); +- } else { +- if (wl) +- { +- tempW1 = 0x00; /* Rtt_WR=off */ +- } +- else +- { +- if (MemClkFreq == 6) { +- tempW1 = 0x200; /* Rtt_WR=RZQ/4=60 Ohm */ +- } else { +- tempW1 = 0x400; /* Rtt_WR=RZQ/2 */ +- } +- } +- } ++ else ++ tempW1 = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank); + tempW=tempW|tempW1; + tempW = swapAddrBits_wl(pDCTData,tempW); + set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT, +@@ -646,9 +624,14 @@ void programODT(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm) + */ + void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass) + { +- u8 ByteLane, Seed_Gross, Seed_Fine; ++ u8 ByteLane, Seed_Gross, Seed_Fine, MemClkFreq; + u32 Value, Addr; + u16 Addl_Data_Offset, Addl_Data_Port; ++ u16 freq_tab[] = {400, 533, 667, 800}; ++ ++ /* MemClkFreq: 3: 400MHz; 4: 533MHz; 5: 667MHz; 6: 800MHz */ ++ MemClkFreq = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, ++ FUN_DCT, DRAM_CONFIG_HIGH, 0, 2); + + /* Program F2x[1, 0]9C_x08[WrLvOdt[3:0]] to the proper ODT settings for the + * current memory subsystem configuration. +@@ -656,12 +639,13 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass) + programODT(pMCTData, pDCTData, dimm); + + /* Program F2x[1,0]9C_x08[WrLvOdtEn]=1 */ +- if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx)) ++ if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx)) { + set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT, + DRAM_ADD_DCT_PHY_CONTROL_REG, WrLvOdtEn, WrLvOdtEn, (u32)1); ++ } + else + { +- /* Program WrLvOdtEn=1 through set bit 12 of D3CSODT reg offset 0 for Rev.B*/ ++ /* Program WrLvOdtEn=1 through set bit 12 of D3CSODT reg offset 0 for Rev.B */ + if (pDCTData->DctTrain) + { + Addl_Data_Offset=0x198; +@@ -687,7 +671,6 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass) + + /* Wait 10 MEMCLKs to allow for ODT signal settling. */ + pMCTData->AgesaDelay(10); +- ByteLane = 0; + if (pass == 1) + { + if (pDCTData->Status[DCT_STATUS_REGISTERED]) +@@ -705,10 +688,17 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass) + } + else + { +- Seed_Gross = 0x00; +- Seed_Fine = 0x1A; ++ if (MemClkFreq == 6) { ++ /* DDR-800 */ ++ Seed_Gross = 0x00; ++ Seed_Fine = 0x1a; ++ } else { ++ /* Use settings for DDR-400 (interpolated from BKDG) */ ++ Seed_Gross = 0x00; ++ Seed_Fine = 0x0d; ++ } + } +- while(ByteLane < MAX_BYTE_LANES) ++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) + { + /* Program an initialization value to registers F2x[1, 0]9C_x[51:50] and + * F2x[1, 0]9C_x52 to set the gross and fine delay for all the byte lane fields +@@ -720,35 +710,32 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass) + */ + pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross; + pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine; +- ByteLane++; + } +- } else if (pDCTData->Status[DCT_STATUS_REGISTERED]) { /* For Pass 2 */ ++ } else { /* Pass 2 */ + /* From BKDG, Write Leveling Seed Value. */ +- /* TODO: The unbuffered DIMMs are unstable on the code below. So temporarily it is +- * only for registered DIMMs. */ + u32 RegisterDelay, SeedTotal; +- u8 MemClkFreq; +- u16 freq_tab[] = {400, 533, 667, 800}; +- while(ByteLane < MAX_BYTE_LANES) ++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) + { +- MemClkFreq = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, +- FUN_DCT, DRAM_CONFIG_HIGH, 0, 2); + if (pDCTData->Status[DCT_STATUS_REGISTERED]) + RegisterDelay = 0x20; /* TODO: ((RCW2 & BIT0) == 0) ? 0x20 : 0x30; */ + else + RegisterDelay = 0; +- SeedTotal = (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1F) | +- pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5; ++ SeedTotal = (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) | ++ (pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5); + /* SeedTotalPreScaling = (the total delay value in F2x[1, 0]9C_x[4A:30] from pass 1 of write levelization + training) - RegisterDelay. */ +- /* MemClkFreq: 3: 400MHz; 4: 533MHz; 5: 667MHz; 6: 800MHz */ +- SeedTotal = (u16) (RegisterDelay + ((((u32) SeedTotal - RegisterDelay) * +- freq_tab[MemClkFreq-3]) / 400)); +- Seed_Gross = (SeedTotal & 0x20) != 0 ? 1 : 2; +- Seed_Fine = SeedTotal & 0x1F; ++ SeedTotal = (uint16_t) (RegisterDelay + ((((uint64_t) SeedTotal - RegisterDelay) * ++ freq_tab[MemClkFreq-3] * 100) / (freq_tab[0] * 100))); ++ Seed_Gross = SeedTotal / 32; ++ Seed_Fine = SeedTotal & 0x1f; ++ if (Seed_Gross == 0) ++ Seed_Gross = 0; ++ else if (Seed_Gross & 0x1) ++ Seed_Gross = 1; ++ else ++ Seed_Gross = 2; + pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross; + pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine; +- ByteLane ++; + } + } + +diff --git a/src/northbridge/amd/amdmct/wrappers/mcti_d.c b/src/northbridge/amd/amdmct/wrappers/mcti_d.c +index ea32893..47260f2 100644 +--- a/src/northbridge/amd/amdmct/wrappers/mcti_d.c ++++ b/src/northbridge/amd/amdmct/wrappers/mcti_d.c +@@ -49,7 +49,7 @@ static const uint16_t ddr3_limits[4] = {800, 666, 533, 400}; + static u16 mctGet_NVbits(u8 index) + { + u16 val = 0; +- int nvram; ++ int nvram = 0; + + switch (index) { + case NV_PACK_TYPE: +@@ -59,6 +59,10 @@ static u16 mctGet_NVbits(u8 index) + val = 1; + #elif CONFIG_CPU_SOCKET_TYPE == 0x13 /* ASB2 */ + val = 4; ++#elif CONFIG_CPU_SOCKET_TYPE == 0x14 /* C32 */ ++ val = 5; ++#elif CONFIG_CPU_SOCKET_TYPE == 0x15 /* G34 */ ++ val = 3; + //#elif SYSTEM_TYPE == MOBILE + // val = 2; + #endif +@@ -297,6 +301,8 @@ static void mctGet_MaxLoadFreq(struct DCTStatStruc *pDCTstat) + /* Determine the number of installed DIMMs */ + int ch1_count = 0; + int ch2_count = 0; ++ uint8_t ch1_registered = 0; ++ uint8_t ch2_registered = 0; + int i; + for (i = 0; i < 15; i = i + 2) { + if (pDCTstat->DIMMValid & (1 << i)) +@@ -304,13 +310,19 @@ static void mctGet_MaxLoadFreq(struct DCTStatStruc *pDCTstat) + if (pDCTstat->DIMMValid & (1 << (i + 1))) + ch2_count++; + } ++ for (i = 0; i < MAX_DIMMS_SUPPORTED; i = i + 2) { ++ if (pDCTstat->DimmRegistered[i]) ++ ch1_registered = 1; ++ if (pDCTstat->DimmRegistered[i + 1]) ++ ch2_registered = 1; ++ } + if (IS_ENABLED(CONFIG_DEBUG_RAM_SETUP)) { + printk(BIOS_DEBUG, "mctGet_MaxLoadFreq: Channel 1: %d DIMM(s) detected\n", ch1_count); + printk(BIOS_DEBUG, "mctGet_MaxLoadFreq: Channel 2: %d DIMM(s) detected\n", ch2_count); + } + + /* Set limits if needed */ +- pDCTstat->PresetmaxFreq = mct_MaxLoadFreq(max(ch1_count, ch2_count), pDCTstat->PresetmaxFreq); ++ pDCTstat->PresetmaxFreq = mct_MaxLoadFreq(max(ch1_count, ch2_count), (ch1_registered || ch2_registered), pDCTstat->PresetmaxFreq); + } + + #ifdef UNUSED_CODE +@@ -413,101 +425,6 @@ static void mctHookAfterDramInit(void) + } + + #if (CONFIG_DIMM_SUPPORT & 0x000F)==0x0005 /* AMD_FAM10_DDR3 */ +-static void coreDelay(u32 microseconds) +-{ +- msr_t now; +- msr_t end; +- u32 cycles; +- +- /* delay ~40us +- This seems like a hack to me... +- It would be nice to have a central delay function. */ +- +- cycles = (microseconds * 100) << 3; /* x8 (number of 1.25ns ticks) */ +- +- if (!(rdmsr(HWCR).lo & TSC_FREQ_SEL_MASK)) { +- msr_t pstate_msr = rdmsr(CUR_PSTATE_MSR); +- if (!(rdmsr(0xC0010064+pstate_msr.lo).lo & NB_DID_M_ON)) { +- cycles = cycles <<1; // half freq, double cycles +- } +- } // else should we keep p0 freq at the time of setting TSC_FREQ_SEL_MASK somewhere and check it here ? +- +- now = rdmsr(TSC_MSR); +- // avoid overflow when called near 2^32 ticks ~ 5.3 s boundaries +- if (0xffffffff - cycles >= now.lo ) { +- end.hi = now.hi; +- end.lo = now.lo + cycles; +- } else { +- end.hi = now.hi +1; // +- end.lo = cycles - (1+(0xffffffff - now.lo)); +- } +- do { +- now = rdmsr(TSC_MSR); +- } while ((now.hi < end.hi) || ((now.hi == end.hi) && (now.lo < end.lo))); +-} +- +-/* Erratum 350 */ +-static void vErrata350(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat) +-{ +- u8 u8Channel; +- u8 u8Receiver; +- u32 u32Addr; +- u8 u8Valid; +- u32 u32DctDev; +- +- // 1. dummy read for each installed DIMM */ +- for (u8Channel = 0; u8Channel < 2; u8Channel++) { +- // This will be 0 for vaild DIMMS, eles 8 +- u8Receiver = mct_InitReceiver_D(pDCTstat, u8Channel); +- +- for (; u8Receiver < 8; u8Receiver += 2) { +- u32Addr = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, u8Channel, u8Receiver, &u8Valid); +- +- if(!u8Valid) { /* Address not supported on current CS */ +- print_t("vErrata350: Address not supported on current CS\n"); +- continue; +- } +- print_t("vErrata350: dummy read \n"); +- read32_fs(u32Addr); +- } +- } +- +- print_t("vErrata350: step 2a\n"); +- +- /* 2. Write 0000_8000h to register F2x[1, 0]9C_xD080F0C. */ +- u32DctDev = pDCTstat->dev_dct; +- Set_NB32_index_wait(u32DctDev, 0x098, 0xD080F0C, 0x00008000); +- /* ^--- value +- ^---F2x[1, 0]9C_x0D080F0C, No description in BKDG. +- ^----F2x[1, 0]98 DRAM Controller Additional Data Offset Register */ +- +- if(!pDCTstat->GangedMode) { +- print_t("vErrata350: step 2b\n"); +- Set_NB32_index_wait(u32DctDev, 0x198, 0xD080F0C, 0x00008000); +- /* ^--- value +- ^---F2x[1, 0]9C_x0D080F0C, No description in BKDG +- ^----F2x[1, 0]98 DRAM Controller Additional Data Offset Register */ +- } +- +- print_t("vErrata350: step 3\n"); +- /* 3. Wait at least 300 nanoseconds. */ +- coreDelay(1); +- +- print_t("vErrata350: step 4\n"); +- /* 4. Write 0000_0000h to register F2x[1, 0]9C_xD080F0C. */ +- Set_NB32_index_wait(u32DctDev, 0x098, 0xD080F0C, 0x00000000); +- +- if(!pDCTstat->GangedMode) { +- print_t("vErrata350: step 4b\n"); +- Set_NB32_index_wait(u32DctDev, 0x198, 0xD080F0C, 0x00000000); +- } +- +- print_t("vErrata350: step 5\n"); +- /* 5. Wait at least 2 microseconds. */ +- coreDelay(2); +- +-} +- + static void vErratum372(struct DCTStatStruc *pDCTstat) + { + msr_t msr = rdmsr(NB_CFG_MSR); +@@ -546,8 +463,7 @@ static void mctHookBeforeAnyTraining(struct MCTStatStruc *pMCTstat, struct DCTSt + { + #if (CONFIG_DIMM_SUPPORT & 0x000F)==0x0005 /* AMD_FAM10_DDR3 */ + /* FIXME : as of 25.6.2010 errata 350 and 372 should apply to ((RB|BL|DA)-C[23])|(HY-D[01])|(PH-E0) but I don't find constants for all of them */ +- if (pDCTstatA->LogicalCPUID & AMD_DRBH_Cx) { +- vErrata350(pMCTstat, pDCTstatA); ++ if (pDCTstatA->LogicalCPUID & (AMD_DRBH_Cx | AMD_DR_Dx)) { + vErratum372(pDCTstatA); + vErratum414(pDCTstatA); + } +-- +1.7.9.5 + |