diff options
Diffstat (limited to 'resources/libreboot/patch/kgpe-d16/0096-amd-amdmct-mct_ddr3-Fix-poor-performance-on-Family-1.patch')
-rw-r--r-- | resources/libreboot/patch/kgpe-d16/0096-amd-amdmct-mct_ddr3-Fix-poor-performance-on-Family-1.patch | 1089 |
1 files changed, 1089 insertions, 0 deletions
diff --git a/resources/libreboot/patch/kgpe-d16/0096-amd-amdmct-mct_ddr3-Fix-poor-performance-on-Family-1.patch b/resources/libreboot/patch/kgpe-d16/0096-amd-amdmct-mct_ddr3-Fix-poor-performance-on-Family-1.patch new file mode 100644 index 00000000..06881504 --- /dev/null +++ b/resources/libreboot/patch/kgpe-d16/0096-amd-amdmct-mct_ddr3-Fix-poor-performance-on-Family-1.patch @@ -0,0 +1,1089 @@ +From 144073db29e770d85d01cbd6b093793aa951862f Mon Sep 17 00:00:00 2001 +From: Timothy Pearson <tpearson@raptorengineeringinc.com> +Date: Fri, 7 Aug 2015 19:05:29 -0500 +Subject: [PATCH 096/139] amd/amdmct/mct_ddr3: Fix poor performance on Family + 15h CPUs + +Change-Id: Ib6bc197e43e40ba2b923b1eb1229bacafc8be360 +Signed-off-by: Timothy Pearson <tpearson@raptorengineeringinc.com> +--- + src/northbridge/amd/amdmct/mct_ddr3/mct_d.c | 370 +++++++++++++++++++++---- + src/northbridge/amd/amdmct/mct_ddr3/mct_d.h | 1 + + src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c | 65 ++++- + src/northbridge/amd/amdmct/mct_ddr3/mctproc.c | 49 +++- + src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c | 195 ++++++++++++- + src/northbridge/amd/amdmct/mct_ddr3/mctwl.c | 4 + + 6 files changed, 604 insertions(+), 80 deletions(-) + +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c +index 1167976..2ca65ca 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c +@@ -36,6 +36,8 @@ + * supported. + */ + ++// #define DEBUG_DIMM_SPD 1 ++ + static u8 ReconfigureDIMMspare_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA); + static void DQSTiming_D(struct MCTStatStruc *pMCTstat, +@@ -172,7 +174,8 @@ static u32 mct_MR1Odt_RDimm(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct, u32 MrsChipSel); + static u32 mct_DramTermDyn_RDimm(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dimm); +-static u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2); ++static u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, ++ uint8_t dct, uint32_t misc2, uint32_t DramControl); + static void mct_BeforeDQSTrainSamp(struct DCTStatStruc *pDCTstat); + static void mct_WriteLevelization_HW(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA, uint8_t Pass); +@@ -1360,6 +1363,8 @@ static uint8_t fam15h_slow_access_mode(struct DCTStatStruc *pDCTstat, uint8_t dc + static void set_2t_configuration(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) + { ++ printk(BIOS_DEBUG, "%s: Start\n", __func__); ++ + uint32_t dev; + uint32_t reg; + uint32_t dword; +@@ -1382,6 +1387,8 @@ static void set_2t_configuration(struct MCTStatStruc *pMCTstat, + else + dword &= ~(0x1 << 20); /* Clear 2T CMD mode */ + Set_NB32_DCT(dev, dct, reg, dword); ++ ++ printk(BIOS_DEBUG, "%s: Done\n", __func__); + } + + static void precise_ndelay_fam15(struct MCTStatStruc *pMCTstat, uint32_t nanoseconds) { +@@ -2002,6 +2009,7 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, + /* Disable training mode */ + uint8_t lane; + uint8_t dimm; ++ uint16_t sword; + uint8_t receiver; + uint8_t max_lane; + uint8_t ecc_enabled; +@@ -2016,21 +2024,37 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, + uint16_t twrwrdd; + uint16_t cdd_twrwrdd; + uint16_t twrrd; ++ uint16_t cdd_twrrd; ++ uint16_t cdd_trwtto; + uint16_t trwtto; + uint8_t first_dimm; + uint16_t delay; + uint16_t delay2; ++ uint8_t min_value; ++ uint8_t write_early; + uint8_t read_odt_delay; + uint8_t write_odt_delay; ++ uint8_t buffer_data_delay; ++ int16_t latency_difference; + uint16_t difference; + uint16_t current_total_delay_1[MAX_BYTE_LANES]; + uint16_t current_total_delay_2[MAX_BYTE_LANES]; ++ uint8_t ddr_voltage_index; ++ uint8_t max_dimms_installable; + + /* FIXME + * This should be platform configurable + */ + uint8_t dimm_event_l_pin_support = 0; + ++ if (pDCTstat->DIMMValidDCT[dct] == 0) ++ ddr_voltage_index = 1; ++ else ++ ddr_voltage_index = dct_ddr_voltage_index(pDCTstat, dct); ++ ++ ddr_voltage_index = dct_ddr_voltage_index(pDCTstat, dct); ++ max_dimms_installable = mctGet_NVbits(NV_MAX_DIMMS_PER_CH); ++ + ecc_enabled = !!(pMCTstat->GStatus & 1 << GSB_ECCDIMMs); + if (ecc_enabled) + max_lane = 9; +@@ -2064,6 +2088,24 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, + else + write_odt_delay = 0; + ++ dword = (Get_NB32_DCT(dev, dct, 0xa8) >> 24) & 0x3; ++ write_early = dword / 2; ++ ++ latency_difference = Get_NB32_DCT(dev, dct, 0x200) & 0x1f; ++ dword = Get_NB32_DCT(dev, dct, 0x20c) & 0x1f; ++ latency_difference -= dword; ++ ++ if (pDCTstat->Status & (1 << SB_LoadReduced)) { ++ /* LRDIMM */ ++ ++ /* TODO ++ * Implement LRDIMM support ++ * See Fam15h BKDG Rev. 3.14 section 2.10.5.5 ++ */ ++ } else { ++ buffer_data_delay = 0; ++ } ++ + /* TODO: + * Adjust trdrdsddc if four-rank DIMMs are installed per + * section 2.10.5.5.1 of the Family 15h BKDG. +@@ -2099,7 +2141,7 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, + } + + /* Convert the difference to MEMCLKs */ +- cdd_trdrddd = (((cdd_trdrddd >> 5) & 0x1f) + 1) / 2; ++ cdd_trdrddd = (((cdd_trdrddd + (1 << 6) - 1) >> 6) & 0xf); + + /* Calculate Trdrddd */ + delay = (read_odt_delay + 3) * 2; +@@ -2145,7 +2187,7 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, + } + + /* Convert the difference to MEMCLKs */ +- cdd_twrwrdd = (((cdd_twrwrdd >> 5) & 0x1f) + 1) / 2; ++ cdd_twrwrdd = (((cdd_twrwrdd + (1 << 6) - 1) >> 6) & 0xf); + + /* Calculate Twrwrdd */ + delay = (write_odt_delay + 3) * 2; +@@ -2164,6 +2206,107 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, + dword &= ~(0x1 << 18); /* DisAutoRefresh = 0 */ + Set_NB32_DCT(dev, dct, 0x8c, dword); /* DRAM Timing High */ + ++ /* Configure power saving options */ ++ dword = Get_NB32_DCT(dev, dct, 0xa8); /* Dram Miscellaneous 2 */ ++ dword |= (0x1 << 22); /* PrtlChPDEnhEn = 0x1 */ ++ dword |= (0x1 << 21); /* AggrPDEn = 0x1 */ ++ Set_NB32_DCT(dev, dct, 0xa8, dword); /* Dram Miscellaneous 2 */ ++ ++ /* Configure partial power down delay */ ++ dword = Get_NB32(dev, 0x244); /* DRAM Controller Miscellaneous 3 */ ++ dword &= ~0xf; /* PrtlChPDDynDly = 0x2 */ ++ dword |= 0x2; ++ Set_NB32(dev, 0x244, dword); /* DRAM Controller Miscellaneous 3 */ ++ ++ /* Configure power save delays */ ++ delay = 0xa; ++ delay2 = 0x3; ++ ++ /* Family 15h BKDG Table 214 */ ++ if ((pDCTstat->Status & (1 << SB_Registered)) ++ || (pDCTstat->Status & (1 << SB_LoadReduced))) { ++ if (memclk_index <= 0x6) { ++ if (ddr_voltage_index < 0x4) ++ /* 1.5 or 1.35V */ ++ delay2 = 0x3; ++ else ++ /* 1.25V */ ++ delay2 = 0x4; ++ } ++ else if ((memclk_index == 0xa) ++ || (memclk_index == 0xe)) ++ delay2 = 0x4; ++ else if (memclk_index == 0x12) ++ delay2 = 0x5; ++ else if (memclk_index == 0x16) ++ delay2 = 0x6; ++ } else { ++ if (memclk_index <= 0x6) ++ delay2 = 0x3; ++ else if ((memclk_index == 0xa) ++ || (memclk_index == 0xe)) ++ delay2 = 0x4; ++ else if (memclk_index == 0x12) ++ delay2 = 0x5; ++ else if (memclk_index == 0x16) ++ delay2 = 0x6; ++ } ++ ++ /* Family 15h BKDG Table 215 */ ++ if (memclk_index <= 0x6) ++ delay = 0xa; ++ else if (memclk_index == 0xa) ++ delay = 0xd; ++ else if (memclk_index == 0xe) ++ delay = 0x10; ++ else if (memclk_index == 0x12) ++ delay = 0x14; ++ else if (memclk_index == 0x16) ++ delay = 0x17; ++ ++ dword = Get_NB32_DCT(dev, dct, 0x248); /* Dram Power Management 0 */ ++ dword &= ~(0x3f << 24); /* AggrPDDelay = 0x0 */ ++ dword &= ~(0x3f << 16); /* PchgPDEnDelay = 0x1 */ ++ dword |= (0x1 << 16); ++ dword &= ~(0x1f << 8); /* Txpdll = delay */ ++ dword |= ((delay & 0x1f) << 8); ++ dword &= ~0xf; /* Txp = delay2 */ ++ dword |= delay2 & 0xf; ++ Set_NB32_DCT(dev, dct, 0x248, dword); /* Dram Power Management 0 */ ++ ++ /* Family 15h BKDG Table 216 */ ++ if (memclk_index <= 0x6) { ++ delay = 0x5; ++ delay2 = 0x3; ++ } ++ else if (memclk_index == 0xa) { ++ delay = 0x6; ++ delay2 = 0x3; ++ } ++ else if (memclk_index == 0xe) { ++ delay = 0x7; ++ delay2 = 0x4; ++ } ++ else if (memclk_index == 0x12) { ++ delay = 0x8; ++ delay2 = 0x4; ++ } ++ else if (memclk_index == 0x16) { ++ delay = 0xa; ++ delay2 = 0x5; ++ } ++ ++ dword = Get_NB32_DCT(dev, dct, 0x24c); /* Dram Power Management 1 */ ++ dword &= ~(0x3f << 24); /* Tcksrx = delay */ ++ dword |= ((delay & 0x3f) << 24); ++ dword &= ~(0x3f << 16); /* Tcksre = delay */ ++ dword |= ((delay & 0x3f) << 16); ++ dword &= ~(0x3f << 8); /* Tckesr = delay2 + 1 */ ++ dword |= (((delay2 + 1) & 0x3f) << 8); ++ dword &= ~0xf; /* Tpd = delay2 */ ++ dword |= delay2 & 0xf; ++ Set_NB32_DCT(dev, dct, 0x24c, dword); /* Dram Power Management 1 */ ++ + dword = Get_NB32_DCT(dev, dct, 0x94); /* DRAM Configuration High */ + dword |= (0xf << 24); /* DcqBypassMax = 0xf */ + dword |= (0x1 << 22); /* BankSwizzleMode = 1 */ +@@ -2216,15 +2359,98 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, + } + } + +- /* TODO +- * Calculate Twrrd per section 2.10.5.5.3 of the Family 15h BKDG +- */ +- twrrd = 0xb; ++ /* Calculate the Critical Delay Difference for Twrrd */ ++ cdd_twrrd = 0; ++ for (receiver = 0; receiver < 8; receiver += 2) { ++ dimm = (receiver >> 1); + +- /* TODO +- * Calculate TrwtTO per section 2.10.5.5.4 of the Family 15h BKDG +- */ +- trwtto = 0x16; ++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, receiver)) ++ continue; ++ ++ read_dqs_write_timing_control_registers(current_total_delay_1, dev, dct, dimm, index_reg); ++ read_dqs_receiver_enable_control_registers(current_total_delay_2, dev, dct, dimm, index_reg); ++ ++ for (lane = 0; lane < max_lane; lane++) { ++ if (current_total_delay_1[lane] > current_total_delay_2[lane]) ++ difference = current_total_delay_1[lane] - current_total_delay_2[lane]; ++ else ++ difference = current_total_delay_2[lane] - current_total_delay_1[lane]; ++ ++ if (difference > cdd_twrrd) ++ cdd_twrrd = difference; ++ } ++ } ++ ++ /* Convert the difference to MEMCLKs */ ++ cdd_twrrd = (((cdd_twrrd + (1 << 6) - 1) >> 6) & 0xf); ++ ++ /* Fam15h BKDG section 2.10.5.5.3 */ ++ if (pDCTstat->Status & (1 << SB_LoadReduced)) { ++ /* LRDIMM */ ++ ++ /* TODO ++ * Implement LRDIMM support ++ * See Fam15h BKDG Rev. 3.14 section 2.10.5.5 ++ */ ++ twrrd = 0xb; ++ } else { ++ sword = (((int16_t)cdd_twrrd + 1 - ((int16_t)write_early * 2)) + 1) / 2; ++ if (sword < 0) ++ sword = 0; ++ if (((uint16_t)sword) > write_odt_delay) ++ dword = sword; ++ else ++ dword = write_odt_delay; ++ dword += 3; ++ if (latency_difference < dword) { ++ dword -= latency_difference; ++ if (dword < 1) ++ twrrd = 1; ++ else ++ twrrd = dword; ++ } else { ++ twrrd = 1; ++ } ++ } ++ ++ /* Calculate the Critical Delay Difference for TrwtTO */ ++ cdd_trwtto = 0; ++ for (receiver = 0; receiver < 8; receiver += 2) { ++ dimm = (receiver >> 1); ++ ++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, receiver)) ++ continue; ++ ++ read_dqs_receiver_enable_control_registers(current_total_delay_1, dev, dct, dimm, index_reg); ++ read_dqs_write_timing_control_registers(current_total_delay_2, dev, dct, dimm, index_reg); ++ ++ for (lane = 0; lane < max_lane; lane++) { ++ if (current_total_delay_1[lane] > current_total_delay_2[lane]) ++ difference = current_total_delay_1[lane] - current_total_delay_2[lane]; ++ else ++ difference = current_total_delay_2[lane] - current_total_delay_1[lane]; ++ ++ if (difference > cdd_trwtto) ++ cdd_trwtto = difference; ++ } ++ } ++ ++ /* Convert the difference to MEMCLKs */ ++ cdd_trwtto = (((cdd_trwtto + (1 << 6) - 1) >> 6) & 0xf); ++ ++ /* Fam15h BKDG section 2.10.5.5.4 */ ++ if (max_dimms_installable == 1) ++ min_value = 0; ++ else ++ min_value = read_odt_delay + buffer_data_delay; ++ sword = (((int16_t)cdd_trwtto - 1 + ((int16_t)write_early * 2)) + 1) / 2; ++ sword += latency_difference + 3; ++ if (sword < 0) ++ sword = 0; ++ if (((uint16_t)sword) > min_value) ++ trwtto = (uint16_t)sword; ++ else ++ trwtto = min_value; + + dword = Get_NB32_DCT(dev, dct, 0xa4); /* DRAM Controller Temperature Throttle */ + dword &= ~(0x1 << 11); /* BwCapEn = 0 */ +@@ -2235,6 +2461,7 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, + dword = Get_NB32_DCT(dev, dct, 0x110); /* DRAM Controller Select Low */ + dword &= ~(0x1 << 2); /* DctSelIntLvEn = interleave_channels */ + dword |= (interleave_channels & 0x1) << 2; ++ dword |= (0x3 << 6); /* DctSelIntLvAddr = 0x3 */ + Set_NB32_DCT(dev, dct, 0x110, dword); /* DRAM Controller Select Low */ + + /* NOTE +@@ -2242,22 +2469,6 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, + * otherwise semi-random lockups will occur due to misconfigured scrubbing hardware! + */ + +- /* FIXME +- * The BKDG-recommended settings cause memory corruption on the ASUS KGPE-D16. +- * Investigate and fix... +- */ +-#if 0 +- /* Fam15h BKDG section 2.10.5.5.1 */ +- dword = Get_NB32_DCT(dev, dct, 0x218); /* DRAM Timing 5 */ +- dword &= ~(0xf << 24); /* TrdrdSdSc = 0x1 */ +- dword |= (0x1 << 24); +- dword &= ~(0xf << 16); /* TrdrdSdDc = trdrdsddc */ +- dword |= ((trdrdsddc & 0xf) << 16); +- dword &= ~(0xf); /* TrdrdDd = trdrddd */ +- dword |= (trdrddd & 0xf); +- Set_NB32_DCT(dev, dct, 0x218, dword); /* DRAM Timing 5 */ +-#endif +- + /* Fam15h BKDG section 2.10.5.5.2 */ + dword = Get_NB32_DCT(dev, dct, 0x214); /* DRAM Timing 4 */ + dword &= ~(0xf << 16); /* TwrwrSdSc = 0x1 */ +@@ -2270,8 +2481,14 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, + + /* Fam15h BKDG section 2.10.5.5.3 */ + dword = Get_NB32_DCT(dev, dct, 0x218); /* DRAM Timing 5 */ ++ dword &= ~(0xf << 24); /* TrdrdSdSc = 0x1 */ ++ dword |= (0x1 << 24); ++ dword &= ~(0xf << 16); /* TrdrdSdDc = trdrdsddc */ ++ dword |= ((trdrdsddc & 0xf) << 16); + dword &= ~(0xf << 8); /* Twrrd = twrrd */ + dword |= ((twrrd & 0xf) << 8); ++ dword &= ~(0xf); /* TrdrdDd = trdrddd */ ++ dword |= (trdrddd & 0xf); + Set_NB32_DCT(dev, dct, 0x218, dword); /* DRAM Timing 5 */ + + /* Fam15h BKDG section 2.10.5.5.4 */ +@@ -2282,12 +2499,6 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, + dword |= ((((dword >> 8) & 0x1f) + 1) << 16); + Set_NB32_DCT(dev, dct, 0x21c, dword); /* DRAM Timing 6 */ + +- /* Configure partial power down delay */ +- dword = Get_NB32(dev, 0x244); /* DRAM Controller Miscellaneous 3 */ +- dword &= ~0xf; /* PrtlChPDDynDly = 0x2 */ +- dword |= 0x2; +- Set_NB32(dev, 0x244, dword); /* DRAM Controller Miscellaneous 3 */ +- + /* Enable prefetchers */ + dword = Get_NB32(dev, 0x11c); /* Memory Controller Configuration High */ + dword &= ~(0x1 << 13); /* PrefIoDis = 0 */ +@@ -2376,6 +2587,8 @@ static void DQSTiming_D(struct MCTStatStruc *pMCTstat, + + mct_TrainDQSPos_D(pMCTstat, pDCTstatA); + ++ TrainMaxRdLatency_En_D(pMCTstat, pDCTstatA); ++ + if (is_fam15h()) + exit_training_mode_fam15(pMCTstat, pDCTstatA); + else +@@ -2953,6 +3166,13 @@ static void ClearDCT_D(struct MCTStatStruc *pMCTstat, + } + + while(reg < reg_end) { ++ if ((reg & 0xFF) == 0x84) { ++ if (is_fam15h()) { ++ val = Get_NB32_DCT(dev, dct, reg); ++ val &= ~(0x1 << 23); /* Clear PchgPDModeSel */ ++ val &= ~0x3; /* Clear BurstCtrl */ ++ } ++ } + if ((reg & 0xFF) == 0x90) { + if (pDCTstat->LogicalCPUID & AMD_DR_Dx) { + val = Get_NB32_DCT(dev, dct, reg); /* get DRAMConfigLow */ +@@ -3071,14 +3291,30 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat, + + /* Convert DRAM CycleTiming values and store into DCT structure */ + byte = pDCTstat->DIMMAutoSpeed; +- if (byte == 7) +- tCK16x = 20; +- else if (byte == 6) +- tCK16x = 24; +- else if (byte == 5) +- tCK16x = 30; +- else +- tCK16x = 40; ++ if (is_fam15h()) { ++ if (byte == 0x16) ++ tCK16x = 17; ++ else if (byte == 0x12) ++ tCK16x = 20; ++ else if (byte == 0xe) ++ tCK16x = 24; ++ else if (byte == 0xa) ++ tCK16x = 30; ++ else if (byte == 0x6) ++ tCK16x = 40; ++ else ++ tCK16x = 48; ++ } ++ else { ++ if (byte == 7) ++ tCK16x = 20; ++ else if (byte == 6) ++ tCK16x = 24; ++ else if (byte == 5) ++ tCK16x = 30; ++ else ++ tCK16x = 40; ++ } + + /* Notes: + 1. All secondary time values given in SPDs are in binary with units of ns. +@@ -3111,7 +3347,7 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat, + val = Max_TrpT; + pDCTstat->Trp = val; + +- /*Trrd*/ ++ /* Trrd */ + pDCTstat->DIMMTrrd = Trrd; + val = Trrd / tCK16x; + if (Trrd % tCK16x) { /* round up number of busclocks */ +@@ -3229,21 +3465,31 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat, + + dword = Get_NB32_DCT(dev, dct, 0x200); /* DRAM Timing 0 */ + dword &= ~(0x3f1f1f1f); +- dword |= ((pDCTstat->Tras + 0xf) & 0x3f) << 24; /* Tras */ +- dword |= ((pDCTstat->Trp + 0x5) & 0x1f) << 16; /* Trp */ +- dword |= ((pDCTstat->Trcd + 0x5) & 0x1f) << 8; /* Trcd */ ++ dword |= (pDCTstat->Tras & 0x3f) << 24; /* Tras */ ++ val = pDCTstat->Trp; ++ val = mct_AdjustSPDTimings(pMCTstat, pDCTstat, val); ++ dword |= (val & 0x1f) << 16; /* Trp */ ++ dword |= (pDCTstat->Trcd & 0x1f) << 8; /* Trcd */ + dword |= (pDCTstat->CASL & 0x1f); /* Tcl */ + Set_NB32_DCT(dev, dct, 0x200, dword); /* DRAM Timing 0 */ + + dword = Get_NB32_DCT(dev, dct, 0x204); /* DRAM Timing 1 */ + dword &= ~(0x0f3f0f3f); +- dword |= ((pDCTstat->Trtp + 0x4) & 0xf) << 24; /* Trtp */ +- if (pDCTstat->Tfaw != 0) +- dword |= ((((pDCTstat->Tfaw - 0x1) * 2) + 0x10) & 0x3f) << 16; /* FourActWindow */ +- dword |= ((pDCTstat->Trrd + 0x4) & 0xf) << 8; /* Trrd */ +- dword |= ((pDCTstat->Trc + 0xb) & 0x3f); /* Trc */ ++ dword |= (pDCTstat->Trtp & 0xf) << 24; /* Trtp */ ++ if (pDCTstat->Tfaw != 0) { ++ val = pDCTstat->Tfaw; ++ val = mct_AdjustSPDTimings(pMCTstat, pDCTstat, val); ++ if ((val > 0x5) && (val < 0x2b)) ++ dword |= (val & 0x3f) << 16; /* FourActWindow */ ++ } ++ dword |= (pDCTstat->Trrd & 0xf) << 8; /* Trrd */ ++ dword |= (pDCTstat->Trc & 0x3f); /* Trc */ + Set_NB32_DCT(dev, dct, 0x204, dword); /* DRAM Timing 1 */ + ++ /* Trfc0-Trfc3 */ ++ for (i=0; i<4; i++) ++ if (pDCTstat->Trfc[i] == 0x0) ++ pDCTstat->Trfc[i] = 0x4; + dword = Get_NB32_DCT(dev, dct, 0x208); /* DRAM Timing 2 */ + dword &= ~(0x07070707); + dword |= (pDCTstat->Trfc[3] & 0x7) << 24; /* Trfc3 */ +@@ -3254,14 +3500,14 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat, + + dword = Get_NB32_DCT(dev, dct, 0x20c); /* DRAM Timing 3 */ + dword &= ~(0x00000f00); +- dword |= ((pDCTstat->Twtr + 0x4) & 0xf) << 8; /* Twtr */ ++ dword |= (pDCTstat->Twtr & 0xf) << 8; /* Twtr */ + dword &= ~(0x0000001f); + dword |= (Tcwl & 0x1f); /* Tcwl */ + Set_NB32_DCT(dev, dct, 0x20c, dword); /* DRAM Timing 3 */ + + dword = Get_NB32_DCT(dev, dct, 0x22c); /* DRAM Timing 10 */ + dword &= ~(0x0000001f); +- dword |= ((pDCTstat->Twr + 0x4) & 0x1f); /* Twr */ ++ dword |= (pDCTstat->Twr & 0x1f); /* Twr */ + Set_NB32_DCT(dev, dct, 0x22c, dword); /* DRAM Timing 10 */ + + if (pDCTstat->Speed > mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) { +@@ -3857,6 +4103,8 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, + } + } + ++ DramConfigMisc2 = mct_SetDramConfigMisc2(pDCTstat, dct, DramConfigMisc2, DramControl); ++ + printk(BIOS_DEBUG, "AutoConfig_D: DramControl: %08x\n", DramControl); + printk(BIOS_DEBUG, "AutoConfig_D: DramTimingLo: %08x\n", DramTimingLo); + printk(BIOS_DEBUG, "AutoConfig_D: DramConfigMisc: %08x\n", DramConfigMisc); +@@ -3868,7 +4116,6 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, + Set_NB32_DCT(dev, dct, 0x78, DramControl); + Set_NB32_DCT(dev, dct, 0x88, DramTimingLo); + Set_NB32_DCT(dev, dct, 0xa0, DramConfigMisc); +- DramConfigMisc2 = mct_SetDramConfigMisc2(pDCTstat, dct, DramConfigMisc2); + Set_NB32_DCT(dev, dct, 0xa8, DramConfigMisc2); + Set_NB32_DCT(dev, dct, 0x90, DramConfigLo); + ProgDramMRSReg_D(pMCTstat, pDCTstat, dct); +@@ -5239,6 +5486,16 @@ static void mct_PhyController_Config(struct MCTStatStruc *pMCTstat, + u32 dev = pDCTstat->dev_dct; + + if (pDCTstat->LogicalCPUID & (AMD_DR_DAC2_OR_C3 | AMD_RB_C3 | AMD_FAM15_ALL)) { ++ if (is_fam15h()) { ++ /* Set F2x[1, 0]98_x0D0F0F13 DllDisEarlyU and DllDisEarlyL to save power */ ++ for (index = 0; index < 0x9; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0013 | (index << 8)); ++ dword |= (0x1 << 1); /* DllDisEarlyU = 1 */ ++ dword |= 0x1; /* DllDisEarlyL = 1 */ ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0013 | (index << 8), dword); ++ } ++ } ++ + if (pDCTstat->Dimmx4Present == 0) { + /* Set bit7 RxDqsUDllPowerDown to register F2x[1, 0]98_x0D0F0F13 for + * additional power saving when x4 DIMMs are not present. +@@ -5283,8 +5540,9 @@ static void mct_FinalMCT_D(struct MCTStatStruc *pMCTstat, + mct_ExtMCTConfig_Dx(pDCTstat); + } else { + /* Family 15h CPUs */ +- val = 0x0ce00f00 | 0x1 << 29; /* FlushWrOnStpGnt */ +- val |= 0x10 << 2; /* MctWrLimit = 16 */ ++ val = 0x0ce00f00; /* FlushWrOnStpGnt = 0x0 */ ++ val |= 0x10 << 2; /* MctWrLimit = 0x10 */ ++ val |= 0x1; /* DctWrLimit = 0x1 */ + Set_NB32(pDCTstat->dev_dct, 0x11c, val); + + val = Get_NB32(pDCTstat->dev_dct, 0x1b0); +@@ -6524,8 +6782,8 @@ void ProgDramMRSReg_D(struct MCTStatStruc *pMCTstat, + + dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x84); + if (is_fam15h()) { +- dword |= DramMRS; + dword &= ~0x00800003; ++ dword |= DramMRS; + } else { + dword &= ~0x00fc2f8f; + dword |= DramMRS; +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h +index 486b16c..ec5658e 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h +@@ -988,6 +988,7 @@ void UMAMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat + uint64_t mctGetLogicalCPUID(u32 Node); + u8 ECCInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); + void TrainReceiverEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA, u8 Pass); ++void TrainMaxRdLatency_En_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); + void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); + void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); + void TrainMaxReadLatency_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c +index c70fa6d..c520515 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c +@@ -24,6 +24,9 @@ static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_ + static void read_read_dqs_timing_control_registers(uint16_t* current_total_delay, + uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg); + ++static void dqsTrainMaxRdLatency_SW_Fam15(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat); ++ + static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u16 like, + u8 scale, u8 ChipSel); +@@ -218,6 +221,27 @@ void TrainReceiverEn_D(struct MCTStatStruc *pMCTstat, + } + } + ++void TrainMaxRdLatency_En_D(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstatA) ++{ ++ uint8_t node; ++ struct DCTStatStruc *pDCTstat; ++ ++ for (node = 0; node < MAX_NODES_SUPPORTED; node++) { ++ pDCTstat = pDCTstatA + node; ++ ++ if (pDCTstat->DCTSysLimit) { ++ if (is_fam15h()) { ++ dqsTrainMaxRdLatency_SW_Fam15(pMCTstat, pDCTstat); ++ } else { ++ /* FIXME ++ * Implement Family 10h MaxRdLatency training ++ */ ++ } ++ } ++ } ++} ++ + static void SetEccDQSRdWrPos_D_Fam10(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 ChipSel) + { +@@ -898,7 +922,7 @@ static void TrainDQSRdWrPos_D_Fam10(struct MCTStatStruc *pMCTstat, + * Algorithm detailed in the Fam15h BKDG Rev. 3.14 section 2.10.5.8.5 + */ + static void Calc_SetMaxRdLatency_D_Fam15(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat, uint8_t dct) ++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t calc_min) + { + uint8_t dimm; + uint8_t lane; +@@ -942,7 +966,8 @@ static void Calc_SetMaxRdLatency_D_Fam15(struct MCTStatStruc *pMCTstat, + p += (9 - dword); + + /* 2.10.5.8.5 (4) */ +- p += 5; ++ if (!calc_min) ++ p += 5; + + /* 2.10.5.8.5 (5) */ + dword = Get_NB32_DCT(dev, dct, 0xa8); +@@ -969,7 +994,8 @@ static void Calc_SetMaxRdLatency_D_Fam15(struct MCTStatStruc *pMCTstat, + p += (max_delay >> 5); + + /* 2.10.5.8.5 (8) */ +- p += 5; ++ if (!calc_min) ++ p += 5; + + /* 2.10.5.8.5 (9) */ + t += 800; +@@ -980,13 +1006,16 @@ static void Calc_SetMaxRdLatency_D_Fam15(struct MCTStatStruc *pMCTstat, + n = (((((uint64_t)p * 1000000000000ULL)/(((uint64_t)fam15h_freq_tab[mem_clk] * 1000000ULL) * 2)) + ((uint64_t)t)) * ((uint64_t)nb_clk * 1000)) / 1000000000ULL; + + /* 2.10.5.8.5 (11) */ +- n -= 1; ++ if (!calc_min) ++ n -= 1; + + /* 2.10.5.8.5 (12) */ +- dword = Get_NB32_DCT_NBPstate(dev, dct, nb_pstate, 0x210); +- dword &= ~(0x3ff << 22); +- dword |= (((n - 1) & 0x3ff) << 22); +- Set_NB32_DCT_NBPstate(dev, dct, nb_pstate, 0x210, dword); ++ if (!calc_min) { ++ dword = Get_NB32_DCT_NBPstate(dev, dct, nb_pstate, 0x210); ++ dword &= ~(0x3ff << 22); ++ dword |= (((n - 1) & 0x3ff) << 22); ++ Set_NB32_DCT_NBPstate(dev, dct, nb_pstate, 0x210, dword); ++ } + + /* Save result for later use */ + pDCTstat->CH_MaxRdLat[dct] = n - 1; +@@ -1107,6 +1136,9 @@ static void read_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat, + } else if (lane < 8) { + Set_NB32_DCT(dev, dct, 0x274, ~0x0); + Set_NB32_DCT(dev, dct, 0x278, ~(0xff << (lane * 8))); ++ } else if (lane == 0xff) { ++ Set_NB32_DCT(dev, dct, 0x274, ~0xffffffff); ++ Set_NB32_DCT(dev, dct, 0x278, ~0xffffffff); + } else { + Set_NB32_DCT(dev, dct, 0x274, ~0x0); + Set_NB32_DCT(dev, dct, 0x278, ~0x0); +@@ -1114,8 +1146,9 @@ static void read_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat, + + dword = Get_NB32_DCT(dev, dct, 0x27c); + dword &= ~(0xff); /* EccMask = 0 */ +- if ((lane != 8) || (pDCTstat->DimmECCPresent == 0)) +- dword |= 0xff; /* EccMask = 0xff */ ++ if (lane != 0xff) ++ if ((lane != 8) || (pDCTstat->DimmECCPresent == 0)) ++ dword |= 0xff; /* EccMask = 0xff */ + Set_NB32_DCT(dev, dct, 0x27c, dword); + + dword = Get_NB32_DCT(dev, dct, 0x270); +@@ -1184,6 +1217,9 @@ static void write_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat, + } else if (lane < 8) { + Set_NB32_DCT(dev, dct, 0x274, ~0x0); + Set_NB32_DCT(dev, dct, 0x278, ~(0xff << (lane * 8))); ++ } else if (lane == 0xff) { ++ Set_NB32_DCT(dev, dct, 0x274, ~0xffffffff); ++ Set_NB32_DCT(dev, dct, 0x278, ~0xffffffff); + } else { + Set_NB32_DCT(dev, dct, 0x274, ~0x0); + Set_NB32_DCT(dev, dct, 0x278, ~0x0); +@@ -1191,8 +1227,9 @@ static void write_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat, + + dword = Get_NB32_DCT(dev, dct, 0x27c); + dword &= ~(0xff); /* EccMask = 0 */ +- if ((lane != 8) || (pDCTstat->DimmECCPresent == 0)) +- dword |= 0xff; /* EccMask = 0xff */ ++ if (lane != 0xff) ++ if ((lane != 8) || (pDCTstat->DimmECCPresent == 0)) ++ dword |= 0xff; /* EccMask = 0xff */ + Set_NB32_DCT(dev, dct, 0x27c, dword); + + dword = Get_NB32_DCT(dev, dct, 0x270); +@@ -1278,7 +1315,7 @@ static uint8_t TrainDQSRdWrPos_D_Fam15(struct MCTStatStruc *pMCTstat, + uint32_t dev = pDCTstat->dev_dct; + + /* Calculate and program MaxRdLatency */ +- Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, dct); ++ Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, dct, 0); + + Errors = 0; + dual_rank = 0; +@@ -1636,7 +1673,7 @@ static void TrainDQSReceiverEnCyc_D_Fam15(struct MCTStatStruc *pMCTstat, + write_dqs_receiver_enable_control_registers(current_phy_phase_delay, dev, dct, dimm, index_reg); + + /* Calculate and program MaxRdLatency */ +- Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, dct); ++ Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, dct, 0); + + /* 2.10.5.8.3 (4 B) */ + dqs_results_array[current_phy_phase_delay[lane]] = TrainDQSRdWrPos_D_Fam15(pMCTstat, pDCTstat, dct, Receiver, Receiver + 2, lane, lane + 1); +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c +index 738304e..3da28b3 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c +@@ -19,7 +19,8 @@ + */ + + /* mct_SetDramConfigMisc2_Cx & mct_SetDramConfigMisc2_Dx */ +-u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2) ++u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, ++ uint8_t dct, uint32_t misc2, uint32_t DramControl) + { + u32 val; + +@@ -28,17 +29,47 @@ u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2) + if (pDCTstat->LogicalCPUID & AMD_FAM15_ALL) { + uint8_t cs_mux_45; + uint8_t cs_mux_67; ++ uint32_t f2x80; + +- /* BKDG v3.14 Table 200 / Table 201 */ +- if (MaxDimmsInstallable < 3) { +- cs_mux_45 = 1; +- cs_mux_67 = 1; +- } else { ++ misc2 &= ~(0x1 << 28); /* FastSelfRefEntryDis = 0x0 */ ++ if (MaxDimmsInstallable == 3) { ++ /* FIXME 3 DIMMS per channel unimplemented */ + cs_mux_45 = 0; ++ } else { ++ uint32_t f2x60 = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x60); ++ f2x80 = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x80); ++ if ((((f2x80 & 0xf) == 0x7) || ((f2x80 & 0xf) == 0x9)) ++ && ((f2x60 & 0x3) == 0x3)) ++ cs_mux_45 = 1; ++ else if ((((f2x80 & 0xa) == 0x7) || ((f2x80 & 0xb) == 0x9)) ++ && ((f2x60 & 0x3) > 0x1)) ++ cs_mux_45 = 1; ++ else ++ cs_mux_45 = 0; ++ } ++ ++ if (MaxDimmsInstallable == 1) { ++ cs_mux_67 = 0; ++ } else if (MaxDimmsInstallable == 2) { ++ uint32_t f2x64 = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x64); ++ f2x80 = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x80); ++ if (((((f2x80 >> 4) & 0xf) == 0x7) || (((f2x80 >> 4) & 0xf) == 0x9)) ++ && ((f2x64 & 0x3) == 0x3)) ++ cs_mux_67 = 1; ++ else if (((((f2x80 >> 4) & 0xa) == 0x7) || (((f2x80 >> 4) & 0xb) == 0x9)) ++ && ((f2x64 & 0x3) > 0x1)) ++ cs_mux_67 = 1; ++ else ++ cs_mux_67 = 0; ++ } else { ++ /* FIXME 3 DIMMS per channel unimplemented */ + cs_mux_67 = 0; + } +- misc2 |= (cs_mux_45 & 0x1) << 26; +- misc2 |= (cs_mux_67 & 0x1) << 27; ++ ++ misc2 &= ~(0x1 << 27); /* CsMux67 = cs_mux_67 */ ++ misc2 |= ((cs_mux_67 & 0x1) << 27); ++ misc2 &= ~(0x1 << 26); /* CsMux45 = cs_mux_45 */ ++ misc2 |= ((cs_mux_45 & 0x1) << 26); + } else if (pDCTstat->LogicalCPUID & (AMD_DR_Dx | AMD_DR_Cx)) { + if (pDCTstat->Status & (1 << SB_Registered)) { + misc2 |= 1 << SubMemclkRegDly; +@@ -50,8 +81,8 @@ u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2) + + if (pDCTstat->LogicalCPUID & AMD_DR_Cx) + misc2 |= 1 << OdtSwizzle; +- val = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x78); + ++ val = DramControl; + val &= 7; + val = ((~val) & 0xff) + 1; + val += 6; +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c +index 707e6a9..3ede104 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c +@@ -1424,7 +1424,7 @@ static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat, + } + + /* Calculate and program MaxRdLatency */ +- Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, Channel); ++ Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, Channel, 0); + + if(_DisableDramECC) { + mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC); +@@ -1487,6 +1487,199 @@ static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat, + printk(BIOS_DEBUG, "TrainRcvrEn: Done\n\n"); + } + ++static void write_max_read_latency_to_registers(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint16_t latency) ++{ ++ uint32_t dword; ++ uint8_t nb_pstate; ++ ++ for (nb_pstate = 0; nb_pstate < 2; nb_pstate++) { ++ dword = Get_NB32_DCT_NBPstate(pDCTstat->dev_dct, dct, nb_pstate, 0x210); ++ dword &= ~(0x3ff << 22); ++ dword |= ((latency & 0x3ff) << 22); ++ Set_NB32_DCT_NBPstate(pDCTstat->dev_dct, dct, nb_pstate, 0x210, dword); ++ } ++} ++ ++/* DQS MaxRdLatency Training (Family 15h) ++ * Algorithm detailed in: ++ * The Fam15h BKDG Rev. 3.14 section 2.10.5.8.5.1 ++ * This algorithm runs at the highest supported MEMCLK. ++ */ ++static void dqsTrainMaxRdLatency_SW_Fam15(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat) ++{ ++ u8 Channel; ++ u8 Addl_Index = 0; ++ u8 Receiver; ++ u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0; ++ u32 Errors; ++ ++ u32 dev; ++ u32 index_reg; ++ u32 ch_start, ch_end; ++ u32 msr; ++ u32 cr4; ++ u32 lo, hi; ++ ++ uint32_t dword; ++ uint8_t dimm; ++ uint8_t lane; ++ uint8_t mem_clk; ++ uint32_t nb_clk; ++ uint8_t nb_pstate; ++ uint16_t current_total_delay[MAX_BYTE_LANES]; ++ uint16_t current_rdqs_total_delay[MAX_BYTE_LANES]; ++ uint8_t current_worst_case_total_delay_dimm; ++ uint16_t current_worst_case_total_delay_value; ++ ++ uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933}; ++ ++ print_debug_dqs("\nTrainMaxRdLatency: Node", pDCTstat->Node_ID, 0); ++ ++ dev = pDCTstat->dev_dct; ++ index_reg = 0x98; ++ ch_start = 0; ++ ch_end = 2; ++ ++ cr4 = read_cr4(); ++ if(cr4 & ( 1 << 9)) { /* save the old value */ ++ _SSE2 = 1; ++ } ++ cr4 |= (1 << 9); /* OSFXSR enable SSE2 */ ++ write_cr4(cr4); ++ ++ msr = HWCR; ++ _RDMSR(msr, &lo, &hi); ++ /* FIXME: Why use SSEDIS */ ++ if(lo & (1 << 17)) { /* save the old value */ ++ _Wrap32Dis = 1; ++ } ++ lo |= (1 << 17); /* HWCR.wrap32dis */ ++ lo &= ~(1 << 15); /* SSEDIS */ ++ _WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */ ++ ++ _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat); ++ ++ Errors = 0; ++ dev = pDCTstat->dev_dct; ++ ++ for (Channel = 0; Channel < 2; Channel++) { ++ print_debug_dqs("\tTrainMaxRdLatency51: Node ", pDCTstat->Node_ID, 1); ++ print_debug_dqs("\tTrainMaxRdLatency51: Channel ", Channel, 1); ++ pDCTstat->Channel = Channel; ++ ++ if (pDCTstat->DIMMValidDCT[Channel] == 0) ++ continue; ++ ++ mem_clk = Get_NB32_DCT(dev, Channel, 0x94) & 0x1f; ++ ++ Receiver = mct_InitReceiver_D(pDCTstat, Channel); ++ ++ /* Find DIMM with worst case receiver enable delays */ ++ current_worst_case_total_delay_dimm = 0; ++ current_worst_case_total_delay_value = 0; ++ ++ /* There are four receiver pairs, loosely associated with chipselects. ++ * This is essentially looping over each DIMM. ++ */ ++ for (; Receiver < 8; Receiver += 2) { ++ Addl_Index = (Receiver >> 1) * 3 + 0x10; ++ dimm = (Receiver >> 1); ++ ++ print_debug_dqs("\t\tTrainMaxRdLatency52: index ", Addl_Index, 2); ++ ++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) { ++ continue; ++ } ++ ++ /* Retrieve the total delay values from pass 1 of DQS receiver enable training */ ++ read_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); ++ read_read_dqs_timing_control_registers(current_rdqs_total_delay, dev, Channel, dimm, index_reg); ++ ++ for (lane = 0; lane < 8; lane++) { ++ current_total_delay[lane] += current_rdqs_total_delay[lane]; ++ if (current_total_delay[lane] > current_worst_case_total_delay_value) { ++ current_worst_case_total_delay_dimm = dimm; ++ current_worst_case_total_delay_value = current_total_delay[lane]; ++ } ++ } ++ ++#if DQS_TRAIN_DEBUG > 0 ++ for (lane = 0; lane < 8; lane++) ++ print_debug_dqs_pair("\t\tTrainMaxRdLatency56: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2); ++#endif ++ } ++ ++ /* 2.10.5.8.5.1.1 */ ++ Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, Channel, 1); ++ ++ /* 2.10.5.8.5.1.[2,3] ++ * Write the DRAM training pattern to the test address ++ */ ++ write_dram_dqs_training_pattern_fam15(pMCTstat, pDCTstat, Channel, current_worst_case_total_delay_dimm << 1, 0xff); ++ ++ /* 2.10.5.8.5.1.4 ++ * Incrementally test each MaxRdLatency candidate ++ */ ++ for (; pDCTstat->CH_MaxRdLat[Channel] < 0x3ff; pDCTstat->CH_MaxRdLat[Channel]++) { ++ write_max_read_latency_to_registers(pMCTstat, pDCTstat, Channel, pDCTstat->CH_MaxRdLat[Channel]); ++ read_dram_dqs_training_pattern_fam15(pMCTstat, pDCTstat, Channel, current_worst_case_total_delay_dimm << 1, 0xff); ++ dword = Get_NB32_DCT(dev, Channel, 0x268) & 0x3ffff; ++ if (!dword) ++ break; ++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000050, 0x13131313); ++ } ++ ++ /* 2.10.5.8.5.1.5 */ ++ nb_pstate = 0; ++ mem_clk = Get_NB32_DCT(dev, Channel, 0x94) & 0x1f; ++ if (fam15h_freq_tab[mem_clk] == 0) { ++ return; ++ } ++ dword = Get_NB32(pDCTstat->dev_nbctl, (0x160 + (nb_pstate * 4))); /* Retrieve NbDid, NbFid */ ++ nb_clk = (200 * (((dword >> 1) & 0x1f) + 0x4)) / (((dword >> 7) & 0x1)?2:1); ++ ++ pDCTstat->CH_MaxRdLat[Channel]++; ++ pDCTstat->CH_MaxRdLat[Channel] += ((((uint64_t)15 * 100000000000ULL) / ((uint64_t)fam15h_freq_tab[mem_clk] * 1000000ULL)) ++ * ((uint64_t)nb_clk * 1000)) / 1000000000ULL; ++ ++ write_max_read_latency_to_registers(pMCTstat, pDCTstat, Channel, pDCTstat->CH_MaxRdLat[Channel]); ++ } ++ ++ if(_DisableDramECC) { ++ mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC); ++ } ++ ++ if(!_Wrap32Dis) { ++ msr = HWCR; ++ _RDMSR(msr, &lo, &hi); ++ lo &= ~(1<<17); /* restore HWCR.wrap32dis */ ++ _WRMSR(msr, lo, hi); ++ } ++ if(!_SSE2){ ++ cr4 = read_cr4(); ++ cr4 &= ~(1<<9); /* restore cr4.OSFXSR */ ++ write_cr4(cr4); ++ } ++ ++#if DQS_TRAIN_DEBUG > 0 ++ { ++ u8 ChannelDTD; ++ printk(BIOS_DEBUG, "TrainMaxRdLatency: CH_MaxRdLat:\n"); ++ for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) { ++ printk(BIOS_DEBUG, "Channel:%x: %x\n", ++ ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]); ++ } ++ } ++#endif ++ ++ printk(BIOS_DEBUG, "TrainMaxRdLatency: Status %x\n", pDCTstat->Status); ++ printk(BIOS_DEBUG, "TrainMaxRdLatency: ErrStatus %x\n", pDCTstat->ErrStatus); ++ printk(BIOS_DEBUG, "TrainMaxRdLatency: ErrCode %x\n", pDCTstat->ErrCode); ++ printk(BIOS_DEBUG, "TrainMaxRdLatency: Done\n\n"); ++} ++ + u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct) + { + if (pDCTstat->DIMMValidDCT[dct] == 0 ) { +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c +index 3153e46..28cc8f6 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c +@@ -172,6 +172,8 @@ static void EnterSelfRefresh(struct MCTStatStruc *pMCTstat, + static void ChangeMemClk(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) + { ++ printk(BIOS_DEBUG, "%s: Start\n", __func__); ++ + uint8_t DCT0Present; + uint8_t DCT1Present; + uint32_t dword; +@@ -313,6 +315,8 @@ static void ChangeMemClk(struct MCTStatStruc *pMCTstat, + mct_Wait(15000); /* Wait for 750us */ + } + } ++ ++ printk(BIOS_DEBUG, "%s: Done\n", __func__); + } + + /* +-- +1.9.1 + |