diff options
author | Francis Rowe <info@gluglug.org.uk> | 2015-10-19 00:12:53 +0100 |
---|---|---|
committer | Francis Rowe <info@gluglug.org.uk> | 2015-10-19 02:32:36 +0100 |
commit | 0622df6194dbb1b2120743c0fd1cc5e72c380128 (patch) | |
tree | 4c858b8c5667fe001a9907ae0578b4ec28a8f513 /resources/libreboot/patch/kgpe-d16/0086-northbridge-amd-amdmct-mct_ddr3-Add-registered-and-x.patch | |
parent | 5999dba5f71f1c05040a551d2420ab8c7f3a9da4 (diff) | |
download | librebootfr-0622df6194dbb1b2120743c0fd1cc5e72c380128.tar.gz librebootfr-0622df6194dbb1b2120743c0fd1cc5e72c380128.zip |
KGPE-D16: update patch set (also update coreboot and vboot)
Also contains other fixes from coreboot, like:
* 551cff0 Derive lvds_dual_channel from EDID timings.
^ makes single/dual channel LVDS selection on GM45 automatic
* 26fc544 lenovo/t60: Enable native intel gfx init.
^ was being maintained in libreboot, now upstreamed so not needed
Framebuffer mode was disabled for the KGPE-D16, because only
text-mode works at the moment.
Diffstat (limited to 'resources/libreboot/patch/kgpe-d16/0086-northbridge-amd-amdmct-mct_ddr3-Add-registered-and-x.patch')
-rw-r--r-- | resources/libreboot/patch/kgpe-d16/0086-northbridge-amd-amdmct-mct_ddr3-Add-registered-and-x.patch | 1887 |
1 files changed, 1887 insertions, 0 deletions
diff --git a/resources/libreboot/patch/kgpe-d16/0086-northbridge-amd-amdmct-mct_ddr3-Add-registered-and-x.patch b/resources/libreboot/patch/kgpe-d16/0086-northbridge-amd-amdmct-mct_ddr3-Add-registered-and-x.patch new file mode 100644 index 00000000..37cda08c --- /dev/null +++ b/resources/libreboot/patch/kgpe-d16/0086-northbridge-amd-amdmct-mct_ddr3-Add-registered-and-x.patch @@ -0,0 +1,1887 @@ +From 95e15fd07cd76057737c0b52a4ece6f73501ea1e Mon Sep 17 00:00:00 2001 +From: Timothy Pearson <tpearson@raptorengineeringinc.com> +Date: Tue, 28 Jul 2015 15:16:46 -0500 +Subject: [PATCH 086/139] northbridge/amd/amdmct/mct_ddr3: Add registered and + x4 DIMM support to Fam15h + +Change-Id: I9ee0bb7346aa35f564fe535cdd337ec7f6148f2b +Signed-off-by: Timothy Pearson <tpearson@raptorengineeringinc.com> +--- + src/northbridge/amd/amdmct/mct_ddr3/mct_d.c | 186 +++++++----- + src/northbridge/amd/amdmct/mct_ddr3/mct_d.h | 2 +- + src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c | 4 + + src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c | 17 +- + src/northbridge/amd/amdmct/mct_ddr3/mctrci.c | 191 ++++++++---- + src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c | 42 ++- + src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c | 253 +++++++++------- + src/northbridge/amd/amdmct/mct_ddr3/mctwl.c | 16 +- + src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c | 400 +++++++++++++++---------- + src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h | 13 +- + 10 files changed, 698 insertions(+), 426 deletions(-) + +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c +index 8102f2a..5a57dc0 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c +@@ -166,7 +166,7 @@ static void mct_EnDllShutdownSR(struct MCTStatStruc *pMCTstat, + static void ChangeMemClk(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); + void SetTargetFreq(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat); ++ struct DCTStatStruc *pDCTstatA, uint8_t Node); + + static u32 mct_MR1Odt_RDimm(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct, u32 MrsChipSel); +@@ -1404,6 +1404,10 @@ static void precise_memclk_delay_fam15(struct MCTStatStruc *pMCTstat, struct DCT + + memclk_freq = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f; + ++ if (fam15h_freq_tab[memclk_freq] == 0) { ++ printk(BIOS_DEBUG, "ERROR: precise_memclk_delay_fam15 for DCT %d (delay %d clocks) failed to obtain valid memory frequency!" ++ " (pDCTstat: %p pDCTstat->dev_dct: %08x memclk_freq: %02x)\n", dct, clocks, pDCTstat, pDCTstat->dev_dct, memclk_freq); ++ } + delay_ns = (((uint64_t)clocks * 1000) / fam15h_freq_tab[memclk_freq]); + precise_ndelay_fam15(pMCTstat, delay_ns); + } +@@ -2320,7 +2324,7 @@ static void DQSTiming_D(struct MCTStatStruc *pMCTstat, + nv_DQSTrainCTL = !allow_config_restore; + + mct_BeforeDQSTrain_D(pMCTstat, pDCTstatA); +- phyAssistedMemFnceTraining(pMCTstat, pDCTstatA); ++ phyAssistedMemFnceTraining(pMCTstat, pDCTstatA, -1); + + if (is_fam15h()) { + uint8_t Node; +@@ -3357,7 +3361,7 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat, + } + + static u8 AutoCycTiming_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, u8 dct) ++ struct DCTStatStruc *pDCTstat, uint8_t dct) + { + /* Initialize DCT Timing registers as per DIMM SPD. + * For primary timing (T, CL) use best case T value. +@@ -3461,7 +3465,7 @@ static void GetPresetmaxF_D(struct MCTStatStruc *pMCTstat, + } + + static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, u8 dct) ++ struct DCTStatStruc *pDCTstat, uint8_t dct) + { + /* Find the best T and CL primary timing parameter pair, per Mfg., + * for the given set of DIMMs, and store into DCTStatStruc +@@ -3740,10 +3744,15 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, + dword++; + } + +- if (Status & (1 << SB_Registered)) +- DramConfigLo |= 1 << ParEn; /* Registered DIMMs */ +- else +- DramConfigLo |= 1 << UnBuffDimm; /* Unbuffered DIMMs */ ++ if (Status & (1 << SB_Registered)) { ++ /* Registered DIMMs */ ++ if (!is_fam15h()) { ++ DramConfigLo |= 1 << ParEn; ++ } ++ } else { ++ /* Unbuffered DIMMs */ ++ DramConfigLo |= 1 << UnBuffDimm; ++ } + + if (mctGet_NVbits(NV_ECC_CAP)) + if (Status & (1 << SB_ECCDIMMs)) +@@ -3761,10 +3770,11 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, + DramConfigHi |= dword - offset; /* get MemClk encoding */ + DramConfigHi |= 1 << MemClkFreqVal; + +- if (Status & (1 << SB_Registered)) +- if ((pDCTstat->Dimmx4Present != 0) && (pDCTstat->Dimmx8Present != 0)) +- /* set only if x8 Registered DIMMs in System*/ +- DramConfigHi |= 1 << RDqsEn; ++ if (!is_fam15h()) ++ if (Status & (1 << SB_Registered)) ++ if ((pDCTstat->Dimmx4Present != 0) && (pDCTstat->Dimmx8Present != 0)) ++ /* set only if x8 Registered DIMMs in System*/ ++ DramConfigHi |= 1 << RDqsEn; + + if (pDCTstat->LogicalCPUID & AMD_FAM15_ALL) { + DramConfigLo |= 1 << 25; /* PendRefPaybackS3En = 1 */ +@@ -3776,14 +3786,16 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, + DramConfigHi |= 1 << 16; + } + +- /* Control Bank Swizzle */ +- if (0) /* call back not needed mctBankSwizzleControl_D()) */ +- DramConfigHi &= ~(1 << BankSwizzleMode); +- else +- DramConfigHi |= 1 << BankSwizzleMode; /* recommended setting (default) */ ++ if (!is_fam15h()) { ++ /* Control Bank Swizzle */ ++ if (0) /* call back not needed mctBankSwizzleControl_D()) */ ++ DramConfigHi &= ~(1 << BankSwizzleMode); ++ else ++ DramConfigHi |= 1 << BankSwizzleMode; /* recommended setting (default) */ ++ } + + /* Check for Quadrank DIMM presence */ +- if ( pDCTstat->DimmQRPresent != 0) { ++ if (pDCTstat->DimmQRPresent != 0) { + byte = mctGet_NVbits(NV_4RANKType); + if (byte == 2) + DramConfigHi |= 1 << 17; /* S4 (4-Rank SO-DIMMs) */ +@@ -4588,8 +4600,9 @@ static u8 mct_setMode(struct MCTStatStruc *pMCTstat, + Set_NB32(pDCTstat->dev_dct, reg, val); + } + if (byte) /* NV_Unganged */ +- pDCTstat->ErrStatus &= ~(1 << SB_DimmMismatchO); /* Clear so that there is no DIMM missmatch error */ ++ pDCTstat->ErrStatus &= ~(1 << SB_DimmMismatchO); /* Clear so that there is no DIMM mismatch error */ + } ++ + return pDCTstat->ErrCode; + } + +@@ -4650,6 +4663,8 @@ void Set_NB32_index_wait(u32 dev, u32 index_reg, u32 index, u32 data) + static u8 mct_BeforePlatformSpec(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) + { ++ printk(BIOS_DEBUG, "%s: Start\n", __func__); ++ + /* mct_checkForCxDxSupport_D */ + if (pDCTstat->LogicalCPUID & AMD_DR_GT_Bx) { + /* Family 10h Errata 322: Address and Command Fine Delay Values May Be Incorrect */ +@@ -4664,6 +4679,9 @@ static u8 mct_BeforePlatformSpec(struct MCTStatStruc *pMCTstat, + else + Set_NB32_index_wait_DCT(pDCTstat->dev_dct, dct, 0x98, 0x0D02E001, 0x90); + } ++ ++ printk(BIOS_DEBUG, "%s: Done\n", __func__); ++ + return pDCTstat->ErrCode; + } + +@@ -4674,6 +4692,8 @@ static u8 mct_PlatformSpec(struct MCTStatStruc *pMCTstat, + * and program them into DCT. + */ + ++ printk(BIOS_DEBUG, "%s: Start\n", __func__); ++ + u32 dev = pDCTstat->dev_dct; + u32 index_reg; + u8 i, i_start, i_end; +@@ -4694,6 +4714,8 @@ static u8 mct_PlatformSpec(struct MCTStatStruc *pMCTstat, + printk(BIOS_SPEW, "Programmed DCT %d timing/termination pattern %08x %08x\n", dct, pDCTstat->CH_ADDR_TMG[i], pDCTstat->CH_ODC_CTL[i]); + } + ++ printk(BIOS_DEBUG, "%s: Done\n", __func__); ++ + return pDCTstat->ErrCode; + } + +@@ -4705,7 +4727,8 @@ static void mct_SyncDCTsReady(struct DCTStatStruc *pDCTstat) + if (pDCTstat->NodePresent) { + dev = pDCTstat->dev_dct; + +- if ((pDCTstat->DIMMValidDCT[0] ) || (pDCTstat->DIMMValidDCT[1])) { /* This Node has dram */ ++ if ((pDCTstat->DIMMValidDCT[0]) || (pDCTstat->DIMMValidDCT[1])) { ++ /* This Node has DRAM */ + do { + val = Get_NB32(dev, 0x110); + } while (!(val & (1 << DramEnabled))); +@@ -5653,57 +5676,56 @@ static void InitDDRPhy(struct MCTStatStruc *pMCTstat, + /* Fam15h BKDG v3.14 section 2.10.5.3 + * The remainder of the Phy Initialization algorithm picks up in phyAssistedMemFnceTraining + */ +- for (dct = 0; dct < 2; dct++) { +- Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000b, 0x80000000); +- Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe013, 0x00000118); +- +- /* Program desired VDDIO level */ +- if (ddr_voltage_index & 0x4) { +- /* 1.25V */ +- amd_voltage_level_index = 0x2; +- } else if (ddr_voltage_index & 0x2) { +- /* 1.35V */ +- amd_voltage_level_index = 0x1; +- } else if (ddr_voltage_index & 0x1) { +- /* 1.50V */ +- amd_voltage_level_index = 0x0; +- } +- +- /* D18F2x9C_x0D0F_0[F,8:0]1F_dct[1:0][RxVioLvl] */ +- for (index = 0; index < 0x9; index++) { +- dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f001f | (index << 8)); +- dword &= ~(0x3 << 3); +- dword |= (amd_voltage_level_index << 3); +- Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f001f | (index << 8), dword); +- } +- +- /* D18F2x9C_x0D0F_[C,8,2][2:0]1F_dct[1:0][RxVioLvl] */ +- for (index = 0; index < 0x3; index++) { +- dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f201f | (index << 8)); +- dword &= ~(0x3 << 3); +- dword |= (amd_voltage_level_index << 3); +- Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f201f | (index << 8), dword); +- } +- for (index = 0; index < 0x2; index++) { +- dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f801f | (index << 8)); +- dword &= ~(0x3 << 3); +- dword |= (amd_voltage_level_index << 3); +- Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f801f | (index << 8), dword); +- } +- for (index = 0; index < 0x1; index++) { +- dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc01f | (index << 8)); +- dword &= ~(0x3 << 3); +- dword |= (amd_voltage_level_index << 3); +- Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc01f | (index << 8), dword); +- } ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000b, 0x80000000); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe013, 0x00000118); + +- /* D18F2x9C_x0D0F_4009_dct[1:0][CmpVioLvl, ComparatorAdjust] */ +- dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f4009); +- dword &= ~(0x0000c00c); +- dword |= (amd_voltage_level_index << 14); +- dword |= (amd_voltage_level_index << 2); +- Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f4009, dword); +- } ++ /* Program desired VDDIO level */ ++ if (ddr_voltage_index & 0x4) { ++ /* 1.25V */ ++ amd_voltage_level_index = 0x2; ++ } else if (ddr_voltage_index & 0x2) { ++ /* 1.35V */ ++ amd_voltage_level_index = 0x1; ++ } else if (ddr_voltage_index & 0x1) { ++ /* 1.50V */ ++ amd_voltage_level_index = 0x0; ++ } ++ ++ /* D18F2x9C_x0D0F_0[F,8:0]1F_dct[1:0][RxVioLvl] */ ++ for (index = 0; index < 0x9; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f001f | (index << 8)); ++ dword &= ~(0x3 << 3); ++ dword |= (amd_voltage_level_index << 3); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f001f | (index << 8), dword); ++ } ++ ++ /* D18F2x9C_x0D0F_[C,8,2][2:0]1F_dct[1:0][RxVioLvl] */ ++ for (index = 0; index < 0x3; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f201f | (index << 8)); ++ dword &= ~(0x3 << 3); ++ dword |= (amd_voltage_level_index << 3); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f201f | (index << 8), dword); ++ } ++ for (index = 0; index < 0x2; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f801f | (index << 8)); ++ dword &= ~(0x3 << 3); ++ dword |= (amd_voltage_level_index << 3); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f801f | (index << 8), dword); ++ } ++ for (index = 0; index < 0x1; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc01f | (index << 8)); ++ dword &= ~(0x3 << 3); ++ dword |= (amd_voltage_level_index << 3); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc01f | (index << 8), dword); ++ } ++ ++ /* D18F2x9C_x0D0F_4009_dct[1:0][CmpVioLvl, ComparatorAdjust] */ ++ /* NOTE: CmpVioLvl and ComparatorAdjust only take effect when set on DCT 0 */ ++ dword = Get_NB32_index_wait_DCT(dev, 0, index_reg, 0x0d0f4009); ++ dword &= ~(0x0000c00c); ++ dword |= (amd_voltage_level_index << 14); ++ dword |= (amd_voltage_level_index << 2); ++ Set_NB32_index_wait_DCT(dev, 0, index_reg, 0x0d0f4009, dword); + + printk(BIOS_DEBUG, "%s: Done\n", __func__); + } +@@ -5719,18 +5741,24 @@ static void InitPhyCompensation(struct MCTStatStruc *pMCTstat, + uint32_t dword; + const u8 *p; + +- printk(BIOS_DEBUG, "%s: Start\n", __func__); ++ printk(BIOS_DEBUG, "%s: DCT %d: Start\n", __func__, dct); + + if (is_fam15h()) { + /* Algorithm detailed in the Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 */ + uint32_t tx_pre; + uint32_t drive_strength; + +- /* Program D18F2x9C_x0D0F_E003_dct[1:0][DisAutoComp, DisablePredriverCal] */ ++ /* Program D18F2x9C_x0D0F_E003_dct[1:0][DisAutoComp] */ + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe003); +- dword |= (0x3 << 13); ++ dword |= (0x1 << 14); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe003, dword); + ++ /* Program D18F2x9C_x0D0F_E003_dct[1:0][DisablePredriverCal] */ ++ /* NOTE: DisablePredriverCal only takes effect when set on DCT 0 */ ++ dword = Get_NB32_index_wait_DCT(dev, 0, index_reg, 0x0d0fe003); ++ dword |= (0x1 << 13); ++ Set_NB32_index_wait_DCT(dev, 0, index_reg, 0x0d0fe003, dword); ++ + /* Determine TxPreP/TxPreN for data lanes (Stage 1) */ + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000000); + drive_strength = (dword >> 20) & 0x7; /* DqsDrvStren */ +@@ -5876,12 +5904,14 @@ static void InitPhyCompensation(struct MCTStatStruc *pMCTstat, + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0a, dword); + } + +- printk(BIOS_DEBUG, "%s: Done\n", __func__); ++ printk(BIOS_DEBUG, "%s: DCT %d: Done\n", __func__, dct); + } + + static void mct_EarlyArbEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) + { ++ printk(BIOS_DEBUG, "%s: Start\n", __func__); ++ + if (!is_fam15h()) { + u32 reg; + u32 val; +@@ -5903,6 +5933,8 @@ static void mct_EarlyArbEn_D(struct MCTStatStruc *pMCTstat, + + Set_NB32_DCT(dev, dct, reg, val); + } ++ ++ printk(BIOS_DEBUG, "%s: Done\n", __func__); + } + + static u8 CheckNBCOFEarlyArbEn(struct MCTStatStruc *pMCTstat, +@@ -6546,6 +6578,8 @@ void mct_SetDramConfigHi_D(struct MCTStatStruc *pMCTstat, + + uint32_t dword; + ++ printk(BIOS_DEBUG, "%s: Start\n", __func__); ++ + if (is_fam15h()) { + /* Initial setup for frequency change + * 9C_x0000_0004 must be configured before MemClkFreqVal is set +@@ -6578,6 +6612,8 @@ void mct_SetDramConfigHi_D(struct MCTStatStruc *pMCTstat, + mct_Wait(100); + } + ++ printk(BIOS_DEBUG, "mct_SetDramConfigHi_D: DramConfigHi: %08x\n", DramConfigHi); ++ + /* Program the DRAM Configuration High register */ + Set_NB32_DCT(dev, dct, 0x94, DramConfigHi); + +@@ -6593,6 +6629,8 @@ void mct_SetDramConfigHi_D(struct MCTStatStruc *pMCTstat, + dword |= 0x0000000f; + Set_NB32_index_wait_DCT(pDCTstat->dev_dct, dct, index_reg, 0x0d0fe006, dword); + } ++ ++ printk(BIOS_DEBUG, "%s: Done\n", __func__); + } + + static void mct_BeforeDQSTrain_D(struct MCTStatStruc *pMCTstat, +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h +index e327d38..486b16c 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h +@@ -1014,7 +1014,7 @@ void InterleaveNodes_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTs + void InterleaveChannels_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); + void mct_BeforeDQSTrain_Samp_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); + +-void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); ++void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA, int16_t Node); + u8 mct_SaveRcvEnDly_D_1Pass(struct DCTStatStruc *pDCTstat, u8 pass); + u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct); + void mct_Wait(u32 cycles); +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c +index 36e9858..c70fa6d 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c +@@ -1588,6 +1588,7 @@ static void TrainDQSReceiverEnCyc_D_Fam15(struct MCTStatStruc *pMCTstat, + + for (dct = 0; dct < 2; dct++) { + /* Program D18F2x9C_x0D0F_E003_dct[1:0][DisAutoComp, DisablePredriverCal] */ ++ /* NOTE: DisablePredriverCal only takes effect when set on DCT 0 */ + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe003); + dword &= ~(0x3 << 13); + dword |= (0x1 << 13); +@@ -1627,6 +1628,9 @@ static void TrainDQSReceiverEnCyc_D_Fam15(struct MCTStatStruc *pMCTstat, + rx_en_offset = (initial_phy_phase_delay[lane] + 0x10) % 0x40; + + /* 2.10.5.8.3 (4) */ ++#if DQS_TRAIN_DEBUG > 0 ++ printk(BIOS_DEBUG, "TrainDQSReceiverEnCyc_D_Fam15 Receiver %d lane %d initial phy delay %04x: iterating from %04x to %04x\n", Receiver, lane, initial_phy_phase_delay[lane], rx_en_offset, 0x3ff); ++#endif + for (current_phy_phase_delay[lane] = rx_en_offset; current_phy_phase_delay[lane] < 0x3ff; current_phy_phase_delay[lane] += ren_step) { + /* 2.10.5.8.3 (4 A) */ + write_dqs_receiver_enable_control_registers(current_phy_phase_delay, dev, dct, dimm, index_reg); +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c +index 539cb0d..1b81d15 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c +@@ -21,7 +21,7 @@ + static uint8_t AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass); + static uint8_t AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass); ++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t dimm, uint8_t pass); + static uint8_t AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass); + static void EnableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); +@@ -133,7 +133,7 @@ static uint8_t PhyWLPass1(struct MCTStatStruc *pMCTstat, + } + + static uint8_t PhyWLPass2(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, u8 dct) ++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t final) + { + u8 dimm; + u16 DIMMValid; +@@ -187,12 +187,15 @@ static uint16_t fam15h_next_highest_memclk_freq(uint16_t memclk_freq) + * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.1 + */ + static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, uint8_t Pass) ++ struct DCTStatStruc *pDCTstatA, uint8_t Node, uint8_t Pass) + { + uint8_t status; + uint8_t timeout; + uint16_t final_target_freq; + ++ struct DCTStatStruc *pDCTstat; ++ pDCTstat = pDCTstatA + Node; ++ + pDCTstat->C_MCTPtr = &(pDCTstat->s_C_MCTPtr); + pDCTstat->C_DCTPtr[0] = &(pDCTstat->s_C_DCTPtr[0]); + pDCTstat->C_DCTPtr[1] = &(pDCTstat->s_C_DCTPtr[1]); +@@ -240,13 +243,13 @@ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat, + pDCTstat->TargetFreq = fam15h_next_highest_memclk_freq(pDCTstat->Speed); + else + pDCTstat->TargetFreq = final_target_freq; +- SetTargetFreq(pMCTstat, pDCTstat); ++ SetTargetFreq(pMCTstat, pDCTstatA, Node); + timeout = 0; + do { + status = 0; + timeout++; +- status |= PhyWLPass2(pMCTstat, pDCTstat, 0); +- status |= PhyWLPass2(pMCTstat, pDCTstat, 1); ++ status |= PhyWLPass2(pMCTstat, pDCTstat, 0, (pDCTstat->TargetFreq == final_target_freq)); ++ status |= PhyWLPass2(pMCTstat, pDCTstat, 1, (pDCTstat->TargetFreq == final_target_freq)); + if (status) + printk(BIOS_INFO, + "%s: Retrying write levelling due to invalid value(s) detected in last phase\n", +@@ -290,7 +293,7 @@ void mct_WriteLevelization_HW(struct MCTStatStruc *pMCTstat, + if (pDCTstat->NodePresent) { + mctSMBhub_Init(Node); + Clear_OnDimmMirror(pMCTstat, pDCTstat); +- WriteLevelization_HW(pMCTstat, pDCTstat, Pass); ++ WriteLevelization_HW(pMCTstat, pDCTstatA, Node, Pass); + Restore_OnDimmMirror(pMCTstat, pDCTstat); + } + } +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c b/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c +index 9617f84..624a543 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c +@@ -18,6 +18,78 @@ + * Foundation, Inc. + */ + ++static uint8_t fam15h_rdimm_rc2_control_code(struct DCTStatStruc *pDCTstat, uint8_t dct) ++{ ++ uint8_t MaxDimmsInstallable = mctGet_NVbits(NV_MAX_DIMMS_PER_CH); ++ ++ uint8_t package_type; ++ uint8_t control_code = 0; ++ ++ package_type = mctGet_NVbits(NV_PACK_TYPE); ++ uint16_t MemClkFreq = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f; ++ ++ /* Obtain number of DIMMs on channel */ ++ uint8_t dimm_count = pDCTstat->MAdimms[dct]; ++ ++ /* FIXME ++ * Assume there is only one register on the RDIMM for now ++ */ ++ uint8_t num_registers = 1; ++ ++ if (package_type == PT_GR) { ++ /* Socket G34 */ ++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.7.1.2.1 Table 85 */ ++ if (MaxDimmsInstallable == 1) { ++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) { ++ /* DDR3-667 - DDR3-800 */ ++ control_code = 0x1; ++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) { ++ /* DDR3-1066 - DDR3-1333 */ ++ if (num_registers == 1) { ++ control_code = 0x0; ++ } else { ++ control_code = 0x1; ++ } ++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) { ++ /* DDR3-1600 - DDR3-1866 */ ++ control_code = 0x0; ++ } ++ } else if (MaxDimmsInstallable == 2) { ++ if (dimm_count == 1) { ++ /* 1 DIMM detected */ ++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) { ++ /* DDR3-667 - DDR3-800 */ ++ control_code = 0x1; ++ } else if ((MemClkFreq >= 0xa) && (MemClkFreq <= 0x12)) { ++ /* DDR3-1066 - DDR3-1600 */ ++ if (num_registers == 1) { ++ control_code = 0x0; ++ } else { ++ control_code = 0x1; ++ } ++ } ++ } else if (dimm_count == 2) { ++ /* 2 DIMMs detected */ ++ if (num_registers == 1) { ++ control_code = 0x1; ++ } else { ++ control_code = 0x8; ++ } ++ } ++ } else if (MaxDimmsInstallable == 3) { ++ /* TODO ++ * 3 DIMM/channel support unimplemented ++ */ ++ } ++ } else { ++ /* TODO ++ * Other socket support unimplemented ++ */ ++ } ++ ++ return control_code; ++} ++ + static uint16_t memclk_to_freq(uint16_t memclk) { + uint16_t fam10h_freq_tab[] = {0, 0, 0, 400, 533, 667, 800}; + uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933}; +@@ -37,36 +109,46 @@ static uint16_t memclk_to_freq(uint16_t memclk) { + return mem_freq; + } + ++static uint8_t rc_word_chip_select_lower_bit(void) { ++ if (is_fam15h()) { ++ return 21; ++ } else { ++ return 20; ++ } ++} ++ ++static uint32_t rc_word_address_to_ctl_bits(uint32_t address) { ++ if (is_fam15h()) { ++ return (((address >> 3) & 0x1) << 2) << 18 | (address & 0x7); ++ } else { ++ return (((address >> 3) & 0x1) << 2) << 16 | (address & 0x7); ++ } ++} ++ + static uint32_t rc_word_value_to_ctl_bits(uint32_t value) { +- return ((value >> 2) & 3) << 16 | ((value & 3) << 3); ++ if (is_fam15h()) { ++ return ((value >> 2) & 0x3) << 18 | ((value & 0x3) << 3); ++ } else { ++ return ((value >> 2) & 0x3) << 16 | ((value & 0x3) << 3); ++ } + } + + static u32 mct_ControlRC(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, u32 MrsChipSel, u32 CtrlWordNum) ++ struct DCTStatStruc *pDCTstat, uint8_t dct, u32 MrsChipSel, u32 CtrlWordNum) + { + u8 Dimms, DimmNum; + u32 val; +- u32 dct = 0; + uint8_t ddr_voltage_index; + uint16_t mem_freq; + uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE); + uint8_t MaxDimmsInstallable = mctGet_NVbits(NV_MAX_DIMMS_PER_CH); + +- DimmNum = (MrsChipSel >> 20) & 0xFE; ++ DimmNum = (MrsChipSel >> rc_word_chip_select_lower_bit()) & 0xfe; + +- /* assume dct=0; */ +- /* if (dct == 1) */ +- /* DimmNum ++; */ +- /* cl +=8; */ ++ if (dct == 1) ++ DimmNum++; + + mem_freq = memclk_to_freq(pDCTstat->DIMMAutoSpeed); +- +- if (pDCTstat->CSPresent_DCT[0] > 0) { +- dct = 0; +- } else if (pDCTstat->CSPresent_DCT[1] > 0 ) { +- dct = 1; +- DimmNum++; +- } + Dimms = pDCTstat->MAdimms[dct]; + + ddr_voltage_index = dct_ddr_voltage_index(pDCTstat, dct); +@@ -76,21 +158,25 @@ static u32 mct_ControlRC(struct MCTStatStruc *pMCTstat, + val = 0x2; + else if (CtrlWordNum == 1) { + if (!((pDCTstat->DimmDRPresent | pDCTstat->DimmQRPresent) & (1 << DimmNum))) +- val = 0xC; /* if single rank, set DBA1 and DBA0 */ ++ val = 0xc; /* if single rank, set DBA1 and DBA0 */ + } else if (CtrlWordNum == 2) { +- if (package_type == PT_GR) { +- /* Socket G34 */ +- if (MaxDimmsInstallable == 2) { +- if (Dimms > 1) +- val = 0x4; ++ if (is_fam15h()) { ++ val = fam15h_rdimm_rc2_control_code(pDCTstat, dct); ++ } else { ++ if (package_type == PT_GR) { ++ /* Socket G34 */ ++ if (MaxDimmsInstallable == 2) { ++ if (Dimms > 1) ++ val = 0x4; ++ } + } + } + } else if (CtrlWordNum == 3) { +- val = (pDCTstat->CtrlWrd3 >> (DimmNum << 2)) & 0xFF; ++ val = (pDCTstat->CtrlWrd3 >> (DimmNum << 2)) & 0xff; + } else if (CtrlWordNum == 4) { +- val = (pDCTstat->CtrlWrd4 >> (DimmNum << 2)) & 0xFF; ++ val = (pDCTstat->CtrlWrd4 >> (DimmNum << 2)) & 0xff; + } else if (CtrlWordNum == 5) { +- val = (pDCTstat->CtrlWrd5 >> (DimmNum << 2)) & 0xFF; ++ val = (pDCTstat->CtrlWrd5 >> (DimmNum << 2)) & 0xff; + } else if (CtrlWordNum == 8) { + if (package_type == PT_GR) { + /* Socket G34 */ +@@ -99,7 +185,7 @@ static u32 mct_ControlRC(struct MCTStatStruc *pMCTstat, + } + } + } else if (CtrlWordNum == 9) { +- val = 0xD; /* DBA1, DBA0, DA3 = 0 */ ++ val = 0xd; /* DBA1, DBA0, DA3 = 0 */ + } else if (CtrlWordNum == 10) { + val = 0x0; /* Lowest operating frequency */ + } else if (CtrlWordNum == 11) { +@@ -114,43 +200,30 @@ static u32 mct_ControlRC(struct MCTStatStruc *pMCTstat, + } + val &= 0xf; + +- printk(BIOS_SPEW, "Preparing to send DIMM RC%d: %02x\n", CtrlWordNum, val); ++ printk(BIOS_SPEW, "Preparing to send DCT %d DIMM RC%d: %02x\n", dct, CtrlWordNum, val); + + val = MrsChipSel | rc_word_value_to_ctl_bits(val); +- +- /* transfer Control word number to address [BA2,A2,A1,A0] */ +- if (CtrlWordNum > 7) { +- val |= 1 << 18; +- CtrlWordNum &= 7; +- } +- val |= CtrlWordNum; ++ val |= rc_word_address_to_ctl_bits(CtrlWordNum); + + return val; + } + + static void mct_SendCtrlWrd(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, u32 val) ++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint32_t val) + { +- uint8_t dct = 0; + u32 dev = pDCTstat->dev_dct; + +- if (pDCTstat->CSPresent_DCT[0] > 0) { +- dct = 0; +- } else if (pDCTstat->CSPresent_DCT[1] > 0 ){ +- dct = 1; +- } +- +- val |= Get_NB32_DCT(dev, dct, 0x7C) & ~0xFFFFFF; ++ val |= Get_NB32_DCT(dev, dct, 0x7c) & ~0xffffff; + val |= 1 << SendControlWord; +- Set_NB32_DCT(dev, dct, 0x7C, val); ++ Set_NB32_DCT(dev, dct, 0x7c, val); + + do { +- val = Get_NB32_DCT(dev, dct, 0x7C); ++ val = Get_NB32_DCT(dev, dct, 0x7c); + } while (val & (1 << SendControlWord)); + } + + void mct_DramControlReg_Init_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, u8 dct) ++ struct DCTStatStruc *pDCTstat, uint8_t dct) + { + u8 MrsChipSel; + u32 dev = pDCTstat->dev_dct; +@@ -163,7 +236,7 @@ void mct_DramControlReg_Init_D(struct MCTStatStruc *pMCTstat, + for (MrsChipSel = 0; MrsChipSel < 8; MrsChipSel ++, MrsChipSel ++) { + if (pDCTstat->CSPresent & (1 << MrsChipSel)) { + val = Get_NB32_DCT(dev, dct, 0xa8); +- val &= ~(0xF << 8); ++ val &= ~(0xf << 8); + + switch (MrsChipSel) { + case 0: +@@ -184,8 +257,8 @@ void mct_DramControlReg_Init_D(struct MCTStatStruc *pMCTstat, + for (cw=0; cw <=15; cw ++) { + mct_Wait(1600); + if (!(cw==6 || cw==7)) { +- val = mct_ControlRC(pMCTstat, pDCTstat, MrsChipSel << 20, cw); +- mct_SendCtrlWrd(pMCTstat, pDCTstat, val); ++ val = mct_ControlRC(pMCTstat, pDCTstat, dct, MrsChipSel << rc_word_chip_select_lower_bit(), cw); ++ mct_SendCtrlWrd(pMCTstat, pDCTstat, dct, val); + } + } + } +@@ -195,7 +268,7 @@ void mct_DramControlReg_Init_D(struct MCTStatStruc *pMCTstat, + } + + void FreqChgCtrlWrd(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat) ++ struct DCTStatStruc *pDCTstat, uint8_t dct) + { + u32 SaveSpeed = pDCTstat->DIMMAutoSpeed; + u32 MrsChipSel; +@@ -208,10 +281,10 @@ void FreqChgCtrlWrd(struct MCTStatStruc *pMCTstat, + for (MrsChipSel=0; MrsChipSel < 8; MrsChipSel++, MrsChipSel++) { + if (pDCTstat->CSPresent & (1 << MrsChipSel)) { + /* 2. Program F2x[1, 0]A8[CtrlWordCS]=bit mask for target chip selects. */ +- val = Get_NB32_DCT(dev, 0, 0xA8); /* TODO: dct 0 / 1 select */ +- val &= ~(0xFF << 8); +- val |= (0x3 << (MrsChipSel & 0xFE)) << 8; +- Set_NB32_DCT(dev, 0, 0xA8, val); /* TODO: dct 0 / 1 select */ ++ val = Get_NB32_DCT(dev, dct, 0xa8); ++ val &= ~(0xff << 8); ++ val |= (0x3 << (MrsChipSel & 0xfe)) << 8; ++ Set_NB32_DCT(dev, dct, 0xa8, val); + + /* Resend control word 10 */ + uint8_t freq_ctl_val = 0; +@@ -235,21 +308,21 @@ void FreqChgCtrlWrd(struct MCTStatStruc *pMCTstat, + break; + } + +- printk(BIOS_SPEW, "Preparing to send DIMM RC%d: %02x\n", 10, freq_ctl_val); ++ printk(BIOS_SPEW, "Preparing to send DCT %d DIMM RC%d: %02x\n", dct, 10, freq_ctl_val); + +- mct_SendCtrlWrd(pMCTstat, pDCTstat, MrsChipSel << 20 | 0x40002 | rc_word_value_to_ctl_bits(freq_ctl_val)); ++ mct_SendCtrlWrd(pMCTstat, pDCTstat, dct, MrsChipSel << rc_word_chip_select_lower_bit() | rc_word_address_to_ctl_bits(10) | rc_word_value_to_ctl_bits(freq_ctl_val)); + + mct_Wait(1600); + + /* Resend control word 2 */ +- val = mct_ControlRC(pMCTstat, pDCTstat, MrsChipSel << 20, 2); +- mct_SendCtrlWrd(pMCTstat, pDCTstat, val); ++ val = mct_ControlRC(pMCTstat, pDCTstat, dct, MrsChipSel << rc_word_chip_select_lower_bit(), 2); ++ mct_SendCtrlWrd(pMCTstat, pDCTstat, dct, val); + + mct_Wait(1600); + + /* Resend control word 8 */ +- val = mct_ControlRC(pMCTstat, pDCTstat, MrsChipSel << 20, 8); +- mct_SendCtrlWrd(pMCTstat, pDCTstat, val); ++ val = mct_ControlRC(pMCTstat, pDCTstat, dct, MrsChipSel << rc_word_chip_select_lower_bit(), 8); ++ mct_SendCtrlWrd(pMCTstat, pDCTstat, dct, val); + + mct_Wait(1600); + } +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c +index 9ccf77e..09a5f68 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c +@@ -445,13 +445,13 @@ static u32 mct_MR2(struct MCTStatStruc *pMCTstat, + u32 dev = pDCTstat->dev_dct; + u32 dword, ret; + ++ /* The formula for chip select number is: CS = dimm*2+rank */ ++ uint8_t dimm = MrsChipSel / 2; ++ uint8_t rank = MrsChipSel % 2; ++ + if (is_fam15h()) { + uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE); + +- /* The formula for chip select number is: CS = dimm*2+rank */ +- uint8_t dimm = MrsChipSel / 2; +- uint8_t rank = MrsChipSel % 2; +- + /* FIXME: These parameters should be configurable + * For now, err on the side of caution and enable automatic 2x refresh + * when the DDR temperature rises above the internal limits +@@ -496,7 +496,7 @@ static u32 mct_MR2(struct MCTStatStruc *pMCTstat, + ret |= ((dword >> 10) & 3) << 9; + } + +- printk(BIOS_SPEW, "Going to send MR2 control word %08x\n", ret); ++ printk(BIOS_SPEW, "Going to send DCT %d DIMM %d rank %d MR2 control word %08x\n", dct, dimm, rank, ret); + + return ret; + } +@@ -507,6 +507,10 @@ static u32 mct_MR3(struct MCTStatStruc *pMCTstat, + u32 dev = pDCTstat->dev_dct; + u32 dword, ret; + ++ /* The formula for chip select number is: CS = dimm*2+rank */ ++ uint8_t dimm = MrsChipSel / 2; ++ uint8_t rank = MrsChipSel % 2; ++ + if (is_fam15h()) { + ret = 0xc0000; + ret |= (MrsChipSel << 21); +@@ -527,7 +531,7 @@ static u32 mct_MR3(struct MCTStatStruc *pMCTstat, + ret |= (dword >> 24) & 7; + } + +- printk(BIOS_SPEW, "Going to send MR3 control word %08x\n", ret); ++ printk(BIOS_SPEW, "Going to send DCT %d DIMM %d rank %d MR3 control word %08x\n", dct, dimm, rank, ret); + + return ret; + } +@@ -538,6 +542,10 @@ static u32 mct_MR1(struct MCTStatStruc *pMCTstat, + u32 dev = pDCTstat->dev_dct; + u32 dword, ret; + ++ /* The formula for chip select number is: CS = dimm*2+rank */ ++ uint8_t dimm = MrsChipSel / 2; ++ uint8_t rank = MrsChipSel % 2; ++ + if (is_fam15h()) { + uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE); + +@@ -553,10 +561,6 @@ static u32 mct_MR1(struct MCTStatStruc *pMCTstat, + ret = 0x40000; + ret |= (MrsChipSel << 21); + +- /* The formula for chip select number is: CS = dimm*2+rank */ +- uint8_t dimm = MrsChipSel / 2; +- uint8_t rank = MrsChipSel % 2; +- + /* Determine if TQDS should be set */ + if ((pDCTstat->Dimmx8Present & (1 << dimm)) + && (((dimm & 0x1)?(pDCTstat->Dimmx4Present&0x55):(pDCTstat->Dimmx4Present&0xaa)) != 0x0) +@@ -623,7 +627,7 @@ static u32 mct_MR1(struct MCTStatStruc *pMCTstat, + ret |= 1 << 12; + } + +- printk(BIOS_SPEW, "Going to send MR1 control word %08x\n", ret); ++ printk(BIOS_SPEW, "Going to send DCT %d DIMM %d rank %d MR1 control word %08x\n", dct, dimm, rank, ret); + + return ret; + } +@@ -634,6 +638,10 @@ static u32 mct_MR0(struct MCTStatStruc *pMCTstat, + u32 dev = pDCTstat->dev_dct; + u32 dword, ret, dword2; + ++ /* The formula for chip select number is: CS = dimm*2+rank */ ++ uint8_t dimm = MrsChipSel / 2; ++ uint8_t rank = MrsChipSel % 2; ++ + if (is_fam15h()) { + ret = 0x00000; + ret |= (MrsChipSel << 21); +@@ -744,7 +752,7 @@ static u32 mct_MR0(struct MCTStatStruc *pMCTstat, + ret |= 1 << 8; + } + +- printk(BIOS_SPEW, "Going to send MR0 control word %08x\n", ret); ++ printk(BIOS_SPEW, "Going to send DCT %d DIMM %d rank %d MR0 control word %08x\n", dct, dimm, rank, ret); + + return ret; + } +@@ -811,6 +819,16 @@ void mct_DramInit_Sw_D(struct MCTStatStruc *pMCTstat, + /* 8.wait 360ns */ + mct_Wait(80); + ++ /* Set up address parity */ ++ if ((pDCTstat->Status & (1 << SB_Registered)) ++ || (pDCTstat->Status & (1 << SB_LoadReduced))) { ++ if (is_fam15h()) { ++ dword = Get_NB32_DCT(dev, dct, 0x90); ++ dword |= 1 << ParEn; ++ Set_NB32_DCT(dev, dct, 0x90, dword); ++ } ++ } ++ + /* The following steps are performed with registered DIMMs only and + * must be done for each chip select pair */ + if (pDCTstat->Status & (1 << SB_Registered)) +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c +index 981f467..707e6a9 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c +@@ -1146,8 +1146,10 @@ static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat, + uint8_t dimm; + uint8_t rank; + uint8_t lane; ++ uint8_t nibble; + uint8_t mem_clk; + uint16_t initial_seed; ++ uint8_t train_both_nibbles; + uint16_t current_total_delay[MAX_BYTE_LANES]; + uint16_t dqs_ret_pass1_total_delay[MAX_BYTE_LANES]; + uint16_t rank0_current_total_delay[MAX_BYTE_LANES]; +@@ -1163,6 +1165,11 @@ static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat, + print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0); + print_debug_dqs("TrainRcvEn: Pass", Pass, 0); + ++ train_both_nibbles = 0; ++ if (pDCTstat->Dimmx4Present) ++ if (is_fam15h()) ++ train_both_nibbles = 1; ++ + dev = pDCTstat->dev_dct; + index_reg = 0x98; + ch_start = 0; +@@ -1245,132 +1252,148 @@ static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat, + else + _2Ranks = 0; + for (rank = 0; rank < (_2Ranks + 1); rank++) { +- /* 2.10.5.8.2 (1) +- * Specify the target DIMM to be trained +- * Set TrNibbleSel = 0 +- * +- * TODO: Add support for x4 DIMMs +- */ +- dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008); +- dword &= ~(0x3 << 4); /* TrDimmSel */ +- dword |= ((dimm & 0x3) << 4); +- dword &= ~(0x1 << 2); /* TrNibbleSel */ +- Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword); +- +- /* 2.10.5.8.2 (2) +- * Retrieve gross and fine timing fields from write DQS registers +- */ +- read_dqs_write_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg); ++ for (nibble = 0; nibble < (train_both_nibbles + 1); nibble++) { ++ /* 2.10.5.8.2 (1) ++ * Specify the target DIMM and nibble to be trained ++ */ ++ dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008); ++ dword &= ~(0x3 << 4); /* TrDimmSel = dimm */ ++ dword |= ((dimm & 0x3) << 4); ++ dword &= ~(0x1 << 2); /* TrNibbleSel = nibble */ ++ dword |= ((nibble & 0x1) << 2); ++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword); ++ ++ /* 2.10.5.8.2 (2) ++ * Retrieve gross and fine timing fields from write DQS registers ++ */ ++ read_dqs_write_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg); + +- /* 2.10.5.8.2.1 +- * Generate the DQS Receiver Enable Training Seed Values +- */ +- if (Pass == FirstPass) { +- initial_seed = fam15_receiver_enable_training_seed(pDCTstat, Channel, dimm, rank, package_type); ++ /* 2.10.5.8.2.1 ++ * Generate the DQS Receiver Enable Training Seed Values ++ */ ++ if (Pass == FirstPass) { ++ initial_seed = fam15_receiver_enable_training_seed(pDCTstat, Channel, dimm, rank, package_type); + +- /* Adjust seed for the minimum platform supported frequency */ +- initial_seed = (uint16_t) (((((uint64_t) initial_seed) * +- fam15h_freq_tab[mem_clk] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100))); ++ /* Adjust seed for the minimum platform supported frequency */ ++ initial_seed = (uint16_t) (((((uint64_t) initial_seed) * ++ fam15h_freq_tab[mem_clk] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100))); + +- for (lane = 0; lane < MAX_BYTE_LANES; lane++) { +- uint16_t wl_pass1_delay; +- wl_pass1_delay = current_total_delay[lane]; ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ uint16_t wl_pass1_delay; ++ wl_pass1_delay = current_total_delay[lane]; + +- seed[lane] = initial_seed + wl_pass1_delay; +- } +- } else { +- uint8_t addr_prelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */ +- uint16_t register_delay; +- int16_t seed_prescaling; +- +- memcpy(current_total_delay, dqs_ret_pass1_total_delay, sizeof(current_total_delay)); +- if ((pDCTstat->Status & (1 << SB_Registered))) { +- if (addr_prelaunch) +- register_delay = 0x30; +- else +- register_delay = 0x20; +- } else if ((pDCTstat->Status & (1 << SB_LoadReduced))) { +- /* TODO +- * Load reduced DIMM support unimplemented +- */ +- register_delay = 0x0; ++ seed[lane] = initial_seed + wl_pass1_delay; ++ } + } else { +- register_delay = 0x0; ++ uint8_t addr_prelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */ ++ uint16_t register_delay; ++ int16_t seed_prescaling; ++ ++ memcpy(current_total_delay, dqs_ret_pass1_total_delay, sizeof(current_total_delay)); ++ if ((pDCTstat->Status & (1 << SB_Registered))) { ++ if (addr_prelaunch) ++ register_delay = 0x30; ++ else ++ register_delay = 0x20; ++ } else if ((pDCTstat->Status & (1 << SB_LoadReduced))) { ++ /* TODO ++ * Load reduced DIMM support unimplemented ++ */ ++ register_delay = 0x0; ++ } else { ++ register_delay = 0x0; ++ } ++ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ seed_prescaling = current_total_delay[lane] - register_delay - 0x20; ++ seed[lane] = (uint16_t) (register_delay + ((((uint64_t) seed_prescaling) * fam15h_freq_tab[mem_clk] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100))); ++ } + } + + for (lane = 0; lane < MAX_BYTE_LANES; lane++) { +- seed_prescaling = current_total_delay[lane] - register_delay - 0x20; +- seed[lane] = (uint16_t) (register_delay + ((((uint64_t) seed_prescaling) * fam15h_freq_tab[mem_clk] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100))); +- } +- } ++ seed_gross[lane] = (seed[lane] >> 5) & 0x1f; ++ seed_fine[lane] = seed[lane] & 0x1f; + +- for (lane = 0; lane < MAX_BYTE_LANES; lane++) { +- seed_gross[lane] = (seed[lane] >> 5) & 0x1f; +- seed_fine[lane] = seed[lane] & 0x1f; ++ /*if (seed_gross[lane] == 0) ++ seed_pre_gross[lane] = 0; ++ else */if (seed_gross[lane] & 0x1) ++ seed_pre_gross[lane] = 1; ++ else ++ seed_pre_gross[lane] = 2; + +- /*if (seed_gross[lane] == 0) +- seed_pre_gross[lane] = 0; +- else */if (seed_gross[lane] & 0x1) +- seed_pre_gross[lane] = 1; +- else +- seed_pre_gross[lane] = 2; ++ /* Calculate phase recovery delays */ ++ phase_recovery_delays[lane] = ((seed_pre_gross[lane] & 0x1f) << 5) | (seed_fine[lane] & 0x1f); + +- /* Calculate phase recovery delays */ +- phase_recovery_delays[lane] = ((seed_pre_gross[lane] & 0x1f) << 5) | (seed_fine[lane] & 0x1f); ++ /* Set the gross delay. ++ * NOTE: While the BKDG states to only program DqsRcvEnGrossDelay, this appears ++ * to have been a misprint as DqsRcvEnFineDelay should be set to zero as well. ++ */ ++ current_total_delay[lane] = ((seed_gross[lane] & 0x1f) << 5); ++ } + +- /* Set the gross delay. +- * NOTE: While the BKDG states to only program DqsRcvEnGrossDelay, this appears +- * to have been a misprint as DqsRcvEnFineDelay should be set to zero as well. ++ /* 2.10.5.8.2 (2) / 2.10.5.8.2.1 (5 6) ++ * Program PhRecFineDly and PhRecGrossDly + */ +- current_total_delay[lane] = ((seed_gross[lane] & 0x1f) << 5); +- } ++ write_dram_phase_recovery_control_registers(phase_recovery_delays, dev, Channel, dimm, index_reg); + +- /* 2.10.5.8.2 (2) / 2.10.5.8.2.1 (5 6) +- * Program PhRecFineDly and PhRecGrossDly +- */ +- write_dram_phase_recovery_control_registers(phase_recovery_delays, dev, Channel, dimm, index_reg); ++ /* 2.10.5.8.2 (2) / 2.10.5.8.2.1 (7) ++ * Program the DQS Receiver Enable delay values for each lane ++ */ ++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); + +- /* 2.10.5.8.2 (2) / 2.10.5.8.2.1 (7) +- * Program the DQS Receiver Enable delay values for each lane +- */ +- write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); ++ /* 2.10.5.8.2 (3) ++ * Program DqsRcvTrEn = 1 ++ */ ++ dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008); ++ dword |= (0x1 << 13); ++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword); + +- /* 2.10.5.8.2 (3) +- * Program DqsRcvTrEn = 1 +- */ +- dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008); +- dword |= (0x1 << 13); +- Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword); ++ /* 2.10.5.8.2 (4) ++ * Issue 192 read requests to the target rank ++ */ ++ generate_dram_receiver_enable_training_pattern_fam15(pMCTstat, pDCTstat, Channel, Receiver + (rank & 0x1)); + +- /* 2.10.5.8.2 (4) +- * Issue 192 read requests to the target rank +- */ +- generate_dram_receiver_enable_training_pattern_fam15(pMCTstat, pDCTstat, Channel, Receiver + (rank & 0x1)); ++ /* 2.10.5.8.2 (5) ++ * Program DqsRcvTrEn = 0 ++ */ ++ dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008); ++ dword &= ~(0x1 << 13); ++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword); + +- /* 2.10.5.8.2 (5) +- * Program DqsRcvTrEn = 0 +- */ +- dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008); +- dword &= ~(0x1 << 13); +- Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword); ++ /* 2.10.5.8.2 (6) ++ * Read PhRecGrossDly, PhRecFineDly ++ */ ++ read_dram_phase_recovery_control_registers(phase_recovery_delays, dev, Channel, dimm, index_reg); + +- /* 2.10.5.8.2 (6) +- * Read PhRecGrossDly, PhRecFineDly +- */ +- read_dram_phase_recovery_control_registers(phase_recovery_delays, dev, Channel, dimm, index_reg); ++ /* 2.10.5.8.2 (7) ++ * Calculate and program the DQS Receiver Enable delay values ++ */ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ current_total_delay[lane] = (phase_recovery_delays[lane] & 0x1f); ++ current_total_delay[lane] |= ((seed_gross[lane] + ((phase_recovery_delays[lane] >> 5) & 0x1f) - seed_pre_gross[lane] + 1) << 5); ++ if (nibble == 0) { ++ if (lane == 8) ++ pDCTstat->CH_D_BC_RCVRDLY[Channel][dimm] = current_total_delay[lane]; ++ else ++ pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane] = current_total_delay[lane]; ++ } else { ++ /* 2.10.5.8.2 (1) ++ * Average the trained values of both nibbles on x4 DIMMs ++ */ ++ if (lane == 8) ++ pDCTstat->CH_D_BC_RCVRDLY[Channel][dimm] = (pDCTstat->CH_D_BC_RCVRDLY[Channel][dimm] + current_total_delay[lane]) / 2; ++ else ++ pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane] = (pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane] + current_total_delay[lane]) / 2; ++ } ++ } + +- /* 2.10.5.8.2 (7) +- * Calculate and program the DQS Receiver Enable delay values +- */ +- for (lane = 0; lane < MAX_BYTE_LANES; lane++) { +- current_total_delay[lane] = (phase_recovery_delays[lane] & 0x1f); +- current_total_delay[lane] |= ((seed_gross[lane] + ((phase_recovery_delays[lane] >> 5) & 0x1f) - seed_pre_gross[lane] + 1) << 5); +- if (lane == 8) +- pDCTstat->CH_D_BC_RCVRDLY[Channel][dimm] = current_total_delay[lane]; +- else +- pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane] = current_total_delay[lane]; ++#if DQS_TRAIN_DEBUG > 1 ++ for (lane = 0; lane < 8; lane++) ++ printk(BIOS_DEBUG, "\t\tTrainRcvEn55: Channel: %d dimm: %d nibble: %d lane %d current_total_delay: %04x CH_D_B_RCVRDLY: %04x\n", ++ Channel, dimm, nibble, lane, current_total_delay[lane], pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane]); ++#endif ++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); + } +- write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); + + if (rank == 0) { + /* Back up the Rank 0 delays for later use */ +@@ -1395,7 +1418,7 @@ static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat, + + #if DQS_TRAIN_DEBUG > 0 + for (lane = 0; lane < 8; lane++) +- print_debug_dqs_pair("\t\tTrainRcvEn55: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2); ++ print_debug_dqs_pair("\t\tTrainRcvEn56: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2); + #endif + } + } +@@ -1815,15 +1838,23 @@ void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, + } + + void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstatA) ++ struct DCTStatStruc *pDCTstatA, int16_t single_node_number) + { + u8 Node = 0; + struct DCTStatStruc *pDCTstat; + + printk(BIOS_DEBUG, "%s: Start\n", __func__); + ++ uint8_t start_node = 0; ++ uint8_t end_node = MAX_NODES_SUPPORTED; ++ ++ if (single_node_number >= 0) { ++ start_node = single_node_number; ++ end_node = single_node_number; ++ } ++ + /* FIXME: skip for Ax */ +- for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { ++ for (Node = start_node; Node < end_node; Node++) { + pDCTstat = pDCTstatA + Node; + if (!pDCTstat->NodePresent) + continue; +@@ -1847,6 +1878,8 @@ void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat, + if (!pDCTstat->DIMMValidDCT[dct]) + continue; + ++ printk(BIOS_SPEW, "%s: training node %d DCT %d\n", __func__, Node, dct); ++ + /* Back up D18F2x9C_x0000_0004_dct[1:0] */ + datc_backup = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000004); + +@@ -1985,6 +2018,8 @@ void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat, + + /* Restore D18F2x9C_x0000_0004_dct[1:0] */ + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000004, datc_backup); ++ ++ printk(BIOS_SPEW, "%s: done training node %d DCT %d\n", __func__, Node, dct); + } + } else { + fenceDynTraining_D(pMCTstat, pDCTstat, 0); +@@ -1997,7 +2032,7 @@ void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat, + } + + static uint32_t fenceDynTraining_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, u8 dct) ++ struct DCTStatStruc *pDCTstat, uint8_t dct) + { + u16 avRecValue; + u32 val; +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c +index 6b63ba0..3153e46 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c +@@ -19,7 +19,7 @@ + */ + + static void FreqChgCtrlWrd(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat); ++ struct DCTStatStruc *pDCTstat, uint8_t dct); + + + static void AgesaDelay(u32 msec) +@@ -353,11 +353,14 @@ static void ExitSelfRefresh(struct MCTStatStruc *pMCTstat, + } + + void SetTargetFreq(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat) ++ struct DCTStatStruc *pDCTstatA, uint8_t Node) + { + uint32_t dword; + uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE); + ++ struct DCTStatStruc *pDCTstat; ++ pDCTstat = pDCTstatA + Node; ++ + if (is_fam15h()) { + /* Program F2x[1, 0]90[DisDllShutDownSR]=1. */ + if (pDCTstat->DIMMValidDCT[0]) { +@@ -391,7 +394,7 @@ void SetTargetFreq(struct MCTStatStruc *pMCTstat, + uint8_t dct; + for (dct = 0; dct < 2; dct++) { + if (pDCTstat->DIMMValidDCT[dct]) { +- phyAssistedMemFnceTraining(pMCTstat, pDCTstat); ++ phyAssistedMemFnceTraining(pMCTstat, pDCTstatA, Node); + InitPhyCompensation(pMCTstat, pDCTstat, dct); + } + } +@@ -438,7 +441,12 @@ void SetTargetFreq(struct MCTStatStruc *pMCTstat, + else + pDCTstat->CSPresent = pDCTstat->CSPresent_DCT[1]; + +- FreqChgCtrlWrd(pMCTstat, pDCTstat); ++ if (pDCTstat->DIMMValidDCT[0]) { ++ FreqChgCtrlWrd(pMCTstat, pDCTstat, 0); ++ } ++ if (pDCTstat->DIMMValidDCT[1]) { ++ FreqChgCtrlWrd(pMCTstat, pDCTstat, 1); ++ } + } + } + +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c +index e5e4031..73b231e 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c +@@ -35,9 +35,9 @@ u32 swapBankBits(struct DCTStatStruc *pDCTstat, uint8_t dct, uint32_t MRSValue); + void prepareDimms(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, + u8 dct, u8 dimm, BOOL wl); + void programODT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, u8 dimm); +-void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, u8 dimm, u8 pass); ++void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t dimm, uint8_t pass, uint8_t nibble); + void setWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 dimm, u8 targetAddr, uint8_t pass); +-void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 dimm, uint8_t pass); ++void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 dimm, uint8_t pass, uint8_t nibble); + + static int32_t abs(int32_t val) { + if (val < 0) +@@ -76,6 +76,8 @@ uint8_t AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCT + { + u8 ByteLane; + u32 Value, Addr; ++ uint8_t nibble = 0; ++ uint8_t train_both_nibbles; + u16 Addl_Data_Offset, Addl_Data_Port; + sMCTStruct *pMCTData = pDCTstat->C_MCTPtr; + sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; +@@ -88,98 +90,108 @@ uint8_t AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCT + DRAM_ADD_DCT_PHY_CONTROL_REG, TrDimmSelStart, + TrDimmSelEnd, (u32)dimm); + +- if (is_fam15h()) { +- /* Set TrNibbleSel = 0 +- * +- * TODO: Add support for x4 DIMMs +- */ +- set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, +- DRAM_ADD_DCT_PHY_CONTROL_REG, 2, +- 2, (u32)0); +- } ++ train_both_nibbles = 0; ++ if (pDCTstat->Dimmx4Present) ++ if (is_fam15h()) ++ train_both_nibbles = 1; + +- /* 2. Prepare the DIMMs for write levelization using DDR3-defined +- * MR commands. */ +- prepareDimms(pMCTstat, pDCTstat, dct, dimm, TRUE); ++ for (nibble = 0; nibble < (train_both_nibbles + 1); nibble++) { ++ printk(BIOS_SPEW, "AgesaHwWlPhase1: training nibble %d\n", nibble); + +- /* 3. After the DIMMs are configured, BIOS waits 40 MEMCLKs to +- * satisfy DDR3-defined internal DRAM timing. +- */ +- if (is_fam15h()) +- precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 40); +- else +- pMCTData->AgesaDelay(40); ++ if (is_fam15h()) { ++ /* Program F2x[1, 0]9C_x08[WrtLvTrEn]=0 */ ++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_ADD_DCT_PHY_CONTROL_REG, WrtLvTrEn, WrtLvTrEn, 0); ++ ++ /* Set TrNibbleSel */ ++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_ADD_DCT_PHY_CONTROL_REG, 2, ++ 2, (uint32_t)nibble); ++ } + +- /* 4. Configure the processor's DDR phy for write levelization training: */ +- procConfig(pMCTstat, pDCTstat, dct, dimm, pass); ++ /* 2. Prepare the DIMMs for write levelization using DDR3-defined ++ * MR commands. */ ++ prepareDimms(pMCTstat, pDCTstat, dct, dimm, TRUE); + +- /* 5. Begin write levelization training: +- * Program F2x[1, 0]9C_x08[WrtLvTrEn]=1. */ +- if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx | AMD_FAM15_ALL)) +- { +- set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, +- DRAM_ADD_DCT_PHY_CONTROL_REG, WrtLvTrEn, WrtLvTrEn, 1); +- } +- else +- { +- /* Broadcast write to all D3Dbyte chipset register offset 0xc +- * Set bit 0 (wrTrain) +- * Program bit 4 to nibble being trained (only matters for x4dimms) +- * retain value of 3:2 (Trdimmsel) +- * reset bit 5 (FrzPR) ++ /* 3. After the DIMMs are configured, BIOS waits 40 MEMCLKs to ++ * satisfy DDR3-defined internal DRAM timing. + */ +- if (dct) ++ if (is_fam15h()) ++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 40); ++ else ++ pMCTData->AgesaDelay(40); ++ ++ /* 4. Configure the processor's DDR phy for write levelization training: */ ++ procConfig(pMCTstat, pDCTstat, dct, dimm, pass, nibble); ++ ++ /* 5. Begin write levelization training: ++ * Program F2x[1, 0]9C_x08[WrtLvTrEn]=1. */ ++ if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx | AMD_FAM15_ALL)) + { +- Addl_Data_Offset=0x198; +- Addl_Data_Port=0x19C; ++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_ADD_DCT_PHY_CONTROL_REG, WrtLvTrEn, WrtLvTrEn, 1); + } + else + { +- Addl_Data_Offset=0x98; +- Addl_Data_Port=0x9C; ++ /* Broadcast write to all D3Dbyte chipset register offset 0xc ++ * Set bit 0 (wrTrain) ++ * Program bit 4 to nibble being trained (only matters for x4dimms) ++ * retain value of 3:2 (Trdimmsel) ++ * reset bit 5 (FrzPR) ++ */ ++ if (dct) ++ { ++ Addl_Data_Offset=0x198; ++ Addl_Data_Port=0x19C; ++ } ++ else ++ { ++ Addl_Data_Offset=0x98; ++ Addl_Data_Port=0x9C; ++ } ++ Addr=0x0D00000C; ++ AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId),FUN_DCT,Addl_Data_Offset), 31, 0, &Addr); ++ while ((get_Bits(pDCTData,FUN_DCT,pDCTData->NodeId, FUN_DCT, Addl_Data_Offset, ++ DctAccessDone, DctAccessDone)) == 0); ++ AmdMemPCIReadBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId),FUN_DCT,Addl_Data_Port), 31, 0, &Value); ++ Value = bitTestSet(Value, 0); /* enable WL training */ ++ Value = bitTestReset(Value, 4); /* for x8 only */ ++ Value = bitTestReset(Value, 5); /* for hardware WL training */ ++ AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId),FUN_DCT,Addl_Data_Port), 31, 0, &Value); ++ Addr=0x4D030F0C; ++ AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId),FUN_DCT,Addl_Data_Offset), 31, 0, &Addr); ++ while ((get_Bits(pDCTData,FUN_DCT,pDCTData->NodeId, FUN_DCT, Addl_Data_Offset, ++ DctAccessDone, DctAccessDone)) == 0); + } +- Addr=0x0D00000C; +- AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId),FUN_DCT,Addl_Data_Offset), 31, 0, &Addr); +- while ((get_Bits(pDCTData,FUN_DCT,pDCTData->NodeId, FUN_DCT, Addl_Data_Offset, +- DctAccessDone, DctAccessDone)) == 0); +- AmdMemPCIReadBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId),FUN_DCT,Addl_Data_Port), 31, 0, &Value); +- Value = bitTestSet(Value, 0); /* enable WL training */ +- Value = bitTestReset(Value, 4); /* for x8 only */ +- Value = bitTestReset(Value, 5); /* for hardware WL training */ +- AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId),FUN_DCT,Addl_Data_Port), 31, 0, &Value); +- Addr=0x4D030F0C; +- AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId),FUN_DCT,Addl_Data_Offset), 31, 0, &Addr); +- while ((get_Bits(pDCTData,FUN_DCT,pDCTData->NodeId, FUN_DCT, Addl_Data_Offset, +- DctAccessDone, DctAccessDone)) == 0); +- } + +- if (is_fam15h()) +- proc_MFENCE(); ++ if (is_fam15h()) ++ proc_MFENCE(); + +- /* Wait 200 MEMCLKs. If executing pass 2, wait 32 MEMCLKs. */ +- if (is_fam15h()) +- precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 200); +- else +- pMCTData->AgesaDelay(140); ++ /* Wait 200 MEMCLKs. If executing pass 2, wait 32 MEMCLKs. */ ++ if (is_fam15h()) ++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 200); ++ else ++ pMCTData->AgesaDelay(140); + +- /* Program F2x[1, 0]9C_x08[WrtLevelTrEn]=0. */ +- set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, +- DRAM_ADD_DCT_PHY_CONTROL_REG, WrtLvTrEn, WrtLvTrEn, 0); ++ /* Program F2x[1, 0]9C_x08[WrtLevelTrEn]=0. */ ++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_ADD_DCT_PHY_CONTROL_REG, WrtLvTrEn, WrtLvTrEn, 0); + +- /* Read from registers F2x[1, 0]9C_x[51:50] and F2x[1, 0]9C_x52 +- * to get the gross and fine delay settings +- * for the target DIMM and save these values. */ +- for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { +- getWLByteDelay(pDCTstat, dct, ByteLane, dimm, pass); +- } ++ /* Read from registers F2x[1, 0]9C_x[51:50] and F2x[1, 0]9C_x52 ++ * to get the gross and fine delay settings ++ * for the target DIMM and save these values. */ ++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { ++ getWLByteDelay(pDCTstat, dct, ByteLane, dimm, pass, nibble); ++ } + +- pDCTData->WLCriticalGrossDelayPrevPass = 0x1f; ++ pDCTData->WLCriticalGrossDelayPrevPass = 0x0; ++ } + + return 0; + } + + uint8_t AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, +- u8 dct, u8 dimm, u8 pass) ++ uint8_t dct, uint8_t dimm, uint8_t pass) + { + u8 ByteLane; + uint8_t status = 0; +@@ -190,6 +202,12 @@ uint8_t AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCT + int32_t cgd = pDCTData->WLCriticalGrossDelayPrevPass; + uint8_t index = (uint8_t)(MAX_BYTE_LANES * dimm); + ++ printk(BIOS_SPEW, "\toriginal critical gross delay: %d\n", cgd); ++ ++ /* FIXME ++ * For now, disable CGD adjustment as it seems to interfere with registered DIMM training ++ */ ++ + /* Calculate the Critical Gross Delay */ + for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { + /* Calculate the gross delay differential for this lane */ +@@ -205,6 +223,8 @@ uint8_t AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCT + cgd = gross_diff[ByteLane]; + } + ++ printk(BIOS_SPEW, "\tnew critical gross delay: %d\n", cgd); ++ + pDCTData->WLCriticalGrossDelayPrevPass = cgd; + + if (pDCTstat->Speed != pDCTstat->TargetFreq) { +@@ -281,7 +301,7 @@ uint8_t AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCT + gross_diff[ByteLane] = pDCTData->WLSeedGrossDelay[index+ByteLane] + pDCTData->WLGrossDelay[index+ByteLane]; + gross_diff[ByteLane] -= pDCTData->WLSeedPreGrossDelay[index+ByteLane]; + +- /* Prevent underflow in the presence of noise / instability*/ ++ /* Prevent underflow in the presence of noise / instability */ + if (gross_diff[ByteLane] < cgd) + gross_diff[ByteLane] = cgd; + +@@ -289,7 +309,8 @@ uint8_t AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCT + } + } else { + dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0xa8); +- dword &= ~(0x3 << 24); /* WrDqDqsEarly = 0 */ ++ dword &= ~(0x3 << 24); /* WrDqDqsEarly = pDCTData->WrDqsGrossDlyBaseOffset */ ++ dword |= ((pDCTData->WrDqsGrossDlyBaseOffset & 0x3) << 24); + Set_NB32_DCT(pDCTstat->dev_dct, dct, 0xa8, dword); + } + } +@@ -959,7 +980,7 @@ static uint16_t fam15h_next_lowest_memclk_freq(uint16_t memclk_freq) + #endif + + /*----------------------------------------------------------------------------- +- * void procConfig(MCTStruct *MCTData,DCTStruct *DCTData, u8 Dimm, u8 Pass) ++ * void procConfig(MCTStruct *MCTData,DCTStruct *DCTData, u8 Dimm, u8 Pass, u8 Nibble) + * + * Description: + * This function programs the ODT values for the NB +@@ -972,13 +993,14 @@ static uint16_t fam15h_next_lowest_memclk_freq(uint16_t memclk_freq) + * OUT + * ---------------------------------------------------------------------------- + */ +-void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, u8 dimm, u8 pass) ++void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t dimm, uint8_t pass, uint8_t nibble) + { + u8 ByteLane, MemClkFreq; + int32_t Seed_Gross; + int32_t Seed_Fine; + uint8_t Seed_PreGross; + u32 Value, Addr; ++ uint32_t dword; + u16 Addl_Data_Offset, Addl_Data_Port; + sMCTStruct *pMCTData = pDCTstat->C_MCTPtr; + sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; +@@ -1048,10 +1070,17 @@ void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, ui + uint8_t AddrCmdPrelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */ + uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE); + uint16_t Seed_Total = 0; ++ pDCTData->WrDqsGrossDlyBaseOffset = 0x0; + if (package_type == PT_GR) { + /* Socket G34: Fam15h BKDG v3.14 Table 96 */ + if (pDCTData->Status[DCT_STATUS_REGISTERED]) { ++ /* TODO ++ * Implement mainboard-specific seed and ++ * WrDqsGrossDly base overrides. ++ * 0x41 and 0x0 are the "stock" values ++ */ + Seed_Total = 0x41; ++ pDCTData->WrDqsGrossDlyBaseOffset = 0x2; + } else if (pDCTData->Status[DCT_STATUS_LOAD_REDUCED]) { + Seed_Total = 0x0; + } else { +@@ -1133,15 +1162,16 @@ void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, ui + printk(BIOS_SPEW, "\tLane %02x initial seed: %04x\n", ByteLane, ((Seed_Gross & 0x1f) << 5) | (Seed_Fine & 0x1f)); + } + } else { +- /* Pass 2 */ +- /* From BKDG, Write Leveling Seed Value. */ +- if (is_fam15h()) { +- uint32_t RegisterDelay; +- int32_t SeedTotal; +- int32_t SeedTotalPreScaling; +- uint8_t AddrCmdPrelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */ ++ if (nibble == 0) { ++ /* Pass 2 */ ++ /* From BKDG, Write Leveling Seed Value. */ ++ if (is_fam15h()) { ++ uint32_t RegisterDelay; ++ int32_t SeedTotal[MAX_BYTE_LANES]; ++ int32_t SeedTotalPreScaling[MAX_BYTE_LANES]; ++ uint32_t WrDqDqsEarly; ++ uint8_t AddrCmdPrelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */ + +- for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { + if (pDCTData->Status[DCT_STATUS_REGISTERED]) { + if (AddrCmdPrelaunch) + RegisterDelay = 0x30; +@@ -1150,84 +1180,133 @@ void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, ui + } else { + RegisterDelay = 0; + } ++ + /* Retrieve WrDqDqsEarly */ +- AmdMemPCIReadBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId), FUN_DCT, 0xa8), 25, 24, &Value); ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0xa8); ++ WrDqDqsEarly = (dword >> 24) & 0x3; + +- /* Calculate adjusted seed values */ +- SeedTotal = (pDCTData->WLFineDelayPrevPass[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) | +- ((pDCTData->WLGrossDelayPrevPass[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) << 5); +- SeedTotalPreScaling = (SeedTotal - RegisterDelay - (0x20 * Value)); +- SeedTotal = (int32_t) (RegisterDelay + ((((int64_t) SeedTotalPreScaling) * +- fam15h_freq_tab[MemClkFreq] * 100) / (fam15h_freq_tab[pDCTData->WLPrevMemclkFreq] * 100))); ++ /* FIXME ++ * Ignore WrDqDqsEarly for now to work around training issues ++ */ ++ WrDqDqsEarly = 0; + +- if (SeedTotal >= 0) { +- Seed_Gross = SeedTotal / 32; +- Seed_Fine = SeedTotal % 32; +- } else { +- Seed_Gross = (SeedTotal / 32) - 1; +- Seed_Fine = (SeedTotal % 32) + 32; ++ /* Generate new seed values */ ++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { ++ /* Calculate adjusted seed values */ ++ SeedTotal[ByteLane] = (pDCTData->WLFineDelayPrevPass[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) | ++ ((pDCTData->WLGrossDelayPrevPass[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) << 5); ++ SeedTotalPreScaling[ByteLane] = (SeedTotal[ByteLane] - RegisterDelay - (0x20 * WrDqDqsEarly)); ++ SeedTotal[ByteLane] = (int32_t) (RegisterDelay + ((((int64_t) SeedTotalPreScaling[ByteLane]) * ++ fam15h_freq_tab[MemClkFreq] * 100) / (fam15h_freq_tab[pDCTData->WLPrevMemclkFreq] * 100))); + } + +- if (Seed_Gross == 0) +- Seed_PreGross = 0; +- else if (Seed_Gross & 0x1) +- Seed_PreGross = 1; +- else +- Seed_PreGross = 2; ++ /* Generate register values from seeds */ ++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { ++ printk(BIOS_SPEW, "\tLane %02x scaled delay: %04x\n", ByteLane, SeedTotal[ByteLane]); + +- /* Save seed values for later use */ +- pDCTData->WLSeedGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross; +- pDCTData->WLSeedFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine; +- pDCTData->WLSeedPreGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_PreGross; ++ if (SeedTotal[ByteLane] >= 0) { ++ Seed_Gross = SeedTotal[ByteLane] / 32; ++ Seed_Fine = SeedTotal[ByteLane] % 32; ++ } else { ++ Seed_Gross = (SeedTotal[ByteLane] / 32) - 1; ++ Seed_Fine = (SeedTotal[ByteLane] % 32) + 32; ++ } + +- pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_PreGross; +- pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine; ++ if (Seed_Gross == 0) ++ Seed_PreGross = 0; ++ else if (Seed_Gross & 0x1) ++ Seed_PreGross = 1; ++ else ++ Seed_PreGross = 2; + +- printk(BIOS_SPEW, "\tLane %02x new seed: %04x\n", ByteLane, ((Seed_Gross & 0x1f) << 5) | (Seed_Fine & 0x1f)); +- } +- } else { +- uint32_t RegisterDelay; +- uint32_t SeedTotalPreScaling; +- uint32_t SeedTotal; +- uint8_t AddrCmdPrelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */ +- for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) +- { +- if (pDCTData->Status[DCT_STATUS_REGISTERED]) { +- if (AddrCmdPrelaunch == 0) +- RegisterDelay = 0x20; ++ /* The BKDG-recommended algorithm causes problems with registered DIMMs on some systems ++ * due to the long register delays causing premature total delay wrap-around. ++ * Attempt to work around this... ++ */ ++ Seed_PreGross = Seed_Gross; ++ ++ /* Save seed values for later use */ ++ pDCTData->WLSeedGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross; ++ pDCTData->WLSeedFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine; ++ pDCTData->WLSeedPreGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_PreGross; ++ ++ pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_PreGross; ++ pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine; ++ ++ printk(BIOS_SPEW, "\tLane %02x new seed: %04x\n", ByteLane, ((pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) << 5) | (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f)); ++ } ++ } else { ++ uint32_t RegisterDelay; ++ uint32_t SeedTotalPreScaling; ++ uint32_t SeedTotal; ++ uint8_t AddrCmdPrelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */ ++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) ++ { ++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) { ++ if (AddrCmdPrelaunch == 0) ++ RegisterDelay = 0x20; ++ else ++ RegisterDelay = 0x30; ++ } else { ++ RegisterDelay = 0; ++ } ++ SeedTotalPreScaling = ((pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) | ++ (pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5)) - RegisterDelay; ++ /* SeedTotalPreScaling = (the total delay value in F2x[1, 0]9C_x[4A:30] from pass 1 of write levelization ++ training) - RegisterDelay. */ ++ SeedTotal = (uint16_t) ((((uint64_t) SeedTotalPreScaling) * ++ fam10h_freq_tab[MemClkFreq] * 100) / (fam10h_freq_tab[3] * 100)); ++ Seed_Gross = SeedTotal / 32; ++ Seed_Fine = SeedTotal & 0x1f; ++ if (Seed_Gross == 0) ++ Seed_Gross = 0; ++ else if (Seed_Gross & 0x1) ++ Seed_Gross = 1; + else +- RegisterDelay = 0x30; +- } else { +- RegisterDelay = 0; ++ Seed_Gross = 2; ++ ++ /* The BKDG-recommended algorithm causes problems with registered DIMMs on some systems ++ * due to the long register delays causing premature total delay wrap-around. ++ * Attempt to work around this... ++ */ ++ SeedTotal = ((Seed_Gross & 0x1f) << 5) | (Seed_Fine & 0x1f); ++ SeedTotal += RegisterDelay; ++ Seed_Gross = SeedTotal / 32; ++ Seed_Fine = SeedTotal & 0x1f; ++ ++ pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross; ++ pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine; ++ ++ printk(BIOS_SPEW, "\tLane %02x new seed: %04x\n", ByteLane, ((pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) << 5) | (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f)); + } +- SeedTotalPreScaling = ((pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) | +- (pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5)) - RegisterDelay; +- /* SeedTotalPreScaling = (the total delay value in F2x[1, 0]9C_x[4A:30] from pass 1 of write levelization +- training) - RegisterDelay. */ +- SeedTotal = (uint16_t) ((((uint64_t) SeedTotalPreScaling) * +- fam10h_freq_tab[MemClkFreq] * 100) / (fam10h_freq_tab[3] * 100)); +- Seed_Gross = SeedTotal / 32; +- Seed_Fine = SeedTotal & 0x1f; +- if (Seed_Gross == 0) +- Seed_Gross = 0; +- else if (Seed_Gross & 0x1) +- Seed_Gross = 1; +- else +- Seed_Gross = 2; ++ } + +- /* The BKDG-recommended algorithm causes problems with registered DIMMs on some systems +- * due to the long register delays causing premature total delay wrap-around. +- * Attempt to work around this... +- */ +- SeedTotal = ((Seed_Gross & 0x1f) << 5) | (Seed_Fine & 0x1f); +- SeedTotal += RegisterDelay; +- Seed_Gross = SeedTotal / 32; +- Seed_Fine = SeedTotal & 0x1f; ++ /* Save initial seeds for upper nibble pass */ ++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { ++ pDCTData->WLSeedPreGrossPrevNibble[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLSeedPreGrossDelay[MAX_BYTE_LANES*dimm+ByteLane]; ++ pDCTData->WLSeedGrossPrevNibble[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane]; ++ pDCTData->WLSeedFinePrevNibble[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane]; ++ } ++ } else { ++ /* Restore seed values from lower nibble pass */ ++ if (is_fam15h()) { ++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { ++ pDCTData->WLSeedGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLSeedGrossPrevNibble[MAX_BYTE_LANES*dimm+ByteLane]; ++ pDCTData->WLSeedFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLSeedFinePrevNibble[MAX_BYTE_LANES*dimm+ByteLane]; ++ pDCTData->WLSeedPreGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLSeedPreGrossPrevNibble[MAX_BYTE_LANES*dimm+ByteLane]; + +- pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross; +- pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine; ++ pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLSeedPreGrossPrevNibble[MAX_BYTE_LANES*dimm+ByteLane]; ++ pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLSeedFinePrevNibble[MAX_BYTE_LANES*dimm+ByteLane]; + +- printk(BIOS_SPEW, "\tLane %02x new seed: %04x\n", ByteLane, ((Seed_Gross & 0x1f) << 5) | (Seed_Fine & 0x1f)); ++ printk(BIOS_SPEW, "\tLane %02x new seed: %04x\n", ByteLane, ((pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) << 5) | (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f)); ++ } ++ } else { ++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { ++ pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLSeedGrossPrevNibble[MAX_BYTE_LANES*dimm+ByteLane]; ++ pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = pDCTData->WLSeedFinePrevNibble[MAX_BYTE_LANES*dimm+ByteLane]; ++ ++ printk(BIOS_SPEW, "\tLane %02x new seed: %04x\n", ByteLane, ((pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) << 5) | (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f)); ++ } + } + } + } +@@ -1358,7 +1437,7 @@ void setWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 + } + + /*----------------------------------------------------------------------------- +- * void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 Dimm) ++ * void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 Dimm, u8 Nibble) + * + * Description: + * This function reads the write levelization byte delay from the Phase +@@ -1376,7 +1455,7 @@ void setWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 + * + *----------------------------------------------------------------------------- + */ +-void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 dimm, uint8_t pass) ++void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 dimm, uint8_t pass, uint8_t nibble) + { + sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; + u8 fineStartLoc, fineEndLoc, grossStartLoc, grossEndLoc, tempB, tempB1, index; +@@ -1427,7 +1506,16 @@ void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 + fine = 0; + } + } +- pDCTData->WLFineDelay[index+ByteLane] = (u8)fine; +- pDCTData->WLGrossDelay[index+ByteLane] = (u8)gross; +- printk(BIOS_SPEW, "\tLane %02x final adjusted value: %04x\n", ByteLane, ((gross & 0x1f) << 5) | (fine & 0x1f)); ++ if (nibble == 0) { ++ pDCTData->WLFineDelay[index+ByteLane] = (uint8_t)fine; ++ pDCTData->WLGrossDelay[index+ByteLane] = (uint8_t)gross; ++ } else { ++ uint32_t WLTotalDelay = ((pDCTData->WLGrossDelay[index+ByteLane] & 0x1f) << 5) | (pDCTData->WLFineDelay[index+ByteLane] & 0x1f); ++ WLTotalDelay += ((gross & 0x1f) << 5) | (fine & 0x1f); ++ WLTotalDelay /= 2; ++ pDCTData->WLFineDelay[index+ByteLane] = (uint8_t)(WLTotalDelay & 0x1f); ++ pDCTData->WLGrossDelay[index+ByteLane] = (uint8_t)((WLTotalDelay >> 5) & 0x1f); ++ } ++ ++ printk(BIOS_SPEW, "\tLane %02x adjusted value: %04x\n", ByteLane, ((pDCTData->WLGrossDelay[index+ByteLane] & 0x1f) << 5) | (pDCTData->WLFineDelay[index+ByteLane] & 0x1f)); + } +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h b/src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h +index 12e7c4a..3337c14 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h +@@ -119,16 +119,21 @@ typedef struct _sDCTStruct + u8 DctTrain; /* Current DCT being trained */ + u8 CurrDct; /* Current DCT number (0 or 1) */ + u8 DctCSPresent; /* Current DCT CS mapping */ ++ uint8_t WrDqsGrossDlyBaseOffset; + int32_t WLSeedGrossDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Seed Gross Delay */ + /* per byte Lane Per Logical DIMM*/ + int32_t WLSeedFineDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Seed Fine Delay */ + /* per byte Lane Per Logical DIMM*/ + int32_t WLSeedPreGrossDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Seed Pre-Gross Delay */ + /* per byte Lane Per Logical DIMM*/ +- u8 WLGrossDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Gross Delay */ +- /* per byte Lane Per Logical DIMM*/ +- u8 WLFineDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Fine Delay */ +- /* per byte Lane Per Logical DIMM*/ ++ uint8_t WLSeedPreGrossPrevNibble[MAX_BYTE_LANES*MAX_LDIMMS]; ++ uint8_t WLSeedGrossPrevNibble[MAX_BYTE_LANES*MAX_LDIMMS]; ++ uint8_t WLSeedFinePrevNibble[MAX_BYTE_LANES*MAX_LDIMMS]; ++ /* per byte Lane Per Logical DIMM*/ ++ u8 WLGrossDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Gross Delay */ ++ /* per byte Lane Per Logical DIMM*/ ++ u8 WLFineDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Fine Delay */ ++ /* per byte Lane Per Logical DIMM*/ + u8 WLGrossDelayFirstPass[MAX_BYTE_LANES*MAX_LDIMMS]; /* First-Pass Write Levelization Gross Delay */ + /* per byte Lane Per Logical DIMM*/ + u8 WLFineDelayFirstPass[MAX_BYTE_LANES*MAX_LDIMMS]; /* First-Pass Write Levelization Fine Delay */ +-- +1.9.1 + |