diff options
Diffstat (limited to 'resources/libreboot/patch/kgpe-d16/0033-cpu-amd-Add-initial-AMD-Family-15h-support.patch')
-rw-r--r-- | resources/libreboot/patch/kgpe-d16/0033-cpu-amd-Add-initial-AMD-Family-15h-support.patch | 16206 |
1 files changed, 16206 insertions, 0 deletions
diff --git a/resources/libreboot/patch/kgpe-d16/0033-cpu-amd-Add-initial-AMD-Family-15h-support.patch b/resources/libreboot/patch/kgpe-d16/0033-cpu-amd-Add-initial-AMD-Family-15h-support.patch new file mode 100644 index 00000000..75aa1952 --- /dev/null +++ b/resources/libreboot/patch/kgpe-d16/0033-cpu-amd-Add-initial-AMD-Family-15h-support.patch @@ -0,0 +1,16206 @@ +From db769f9a54ca4b8a1872c031f29aae31f412e2a2 Mon Sep 17 00:00:00 2001 +From: Timothy Pearson <tpearson@raptorengineeringinc.com> +Date: Fri, 16 Oct 2015 13:51:51 -0500 +Subject: [PATCH 033/139] cpu/amd: Add initial AMD Family 15h support + +TEST: Booted ASUS KGPE-D16 with single Opteron 6380 + * Unbuffered DDR3 DIMMs tested and working + * Suspend to RAM (S3) tested and working + +Conflicts: + + src/cpu/amd/car/disable_cache_as_ram.c + +Change-Id: Idffd2ce36ce183fbfa087e5ba69a9148f084b45e +Signed-off-by: Timothy Pearson <tpearson@raptorengineeringinc.com> +--- + src/cpu/amd/car/cache_as_ram.inc | 130 +- + src/cpu/amd/car/disable_cache_as_ram.c | 77 +- + src/cpu/amd/family_10h-family_15h/defaults.h | 266 +- + src/cpu/amd/family_10h-family_15h/fidvid.c | 235 +- + src/cpu/amd/family_10h-family_15h/init_cpus.c | 232 +- + .../amd/family_10h-family_15h/model_10xxx_init.c | 92 +- + src/cpu/amd/family_10h-family_15h/powernow_acpi.c | 50 +- + src/cpu/amd/family_10h-family_15h/processor_name.c | 194 +- + .../amd/family_10h-family_15h/update_microcode.c | 6 + + src/cpu/amd/quadcore/quadcore.c | 109 +- + src/cpu/amd/quadcore/quadcore_id.c | 43 +- + src/include/cpu/amd/model_10xxx_msr.h | 7 + + src/mainboard/advansus/a785e-i/romstage.c | 2 +- + src/mainboard/amd/bimini_fam10/romstage.c | 2 +- + src/mainboard/amd/mahogany_fam10/romstage.c | 2 +- + .../amd/serengeti_cheetah_fam10/romstage.c | 2 +- + src/mainboard/amd/tilapia_fam10/romstage.c | 2 +- + src/mainboard/asus/kfsn4-dre/romstage.c | 2 +- + src/mainboard/asus/m4a78-em/romstage.c | 2 +- + src/mainboard/asus/m4a785-m/romstage.c | 2 +- + src/mainboard/asus/m5a88-v/romstage.c | 2 +- + src/mainboard/avalue/eax-785e/romstage.c | 2 +- + src/mainboard/gigabyte/ma785gm/romstage.c | 2 +- + src/mainboard/gigabyte/ma785gmt/romstage.c | 2 +- + src/mainboard/gigabyte/ma78gm/romstage.c | 2 +- + src/mainboard/hp/dl165_g6_fam10/romstage.c | 2 +- + src/mainboard/iei/kino-780am2-fam10/romstage.c | 2 +- + src/mainboard/jetway/pa78vm5/romstage.c | 2 +- + src/mainboard/msi/ms9652_fam10/romstage.c | 2 +- + src/mainboard/supermicro/h8dmr_fam10/romstage.c | 2 +- + src/mainboard/supermicro/h8qme_fam10/romstage.c | 2 +- + src/mainboard/supermicro/h8scm_fam10/romstage.c | 2 +- + src/mainboard/tyan/s2912_fam10/romstage.c | 2 +- + src/northbridge/amd/amdfam10/Kconfig | 2 +- + src/northbridge/amd/amdfam10/Makefile.inc | 2 + + src/northbridge/amd/amdfam10/amdfam10.h | 6 +- + src/northbridge/amd/amdfam10/amdfam10_util.c | 13 +- + src/northbridge/amd/amdfam10/link_control.c | 86 + + src/northbridge/amd/amdfam10/misc_control.c | 7 + + src/northbridge/amd/amdfam10/nb_control.c | 85 + + src/northbridge/amd/amdfam10/northbridge.c | 233 +- + src/northbridge/amd/amdfam10/raminit_amdmct.c | 304 +- + src/northbridge/amd/amdht/h3ncmn.c | 171 +- + src/northbridge/amd/amdht/ht_wrapper.c | 43 +- + src/northbridge/amd/amdmct/amddefs.h | 78 +- + src/northbridge/amd/amdmct/mct/mct_d.c | 4 +- + src/northbridge/amd/amdmct/mct/mct_d.h | 20 +- + src/northbridge/amd/amdmct/mct/mctpro_d.c | 21 +- + src/northbridge/amd/amdmct/mct_ddr3/mct_d.c | 3187 ++++++++++++++++---- + src/northbridge/amd/amdmct/mct_ddr3/mct_d.h | 124 +- + src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h | 9 + + src/northbridge/amd/amdmct/mct_ddr3/mctardk5.c | 21 +- + src/northbridge/amd/amdmct/mct_ddr3/mctcsi_d.c | 27 +- + src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c | 1087 ++++++- + src/northbridge/amd/amdmct/mct_ddr3/mctecc_d.c | 55 +- + src/northbridge/amd/amdmct/mct_ddr3/mcthdi.c | 7 +- + src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c | 105 +- + src/northbridge/amd/amdmct/mct_ddr3/mctproc.c | 2 +- + src/northbridge/amd/amdmct/mct_ddr3/mctrci.c | 24 +- + src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c | 585 +++- + src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c | 1342 ++++++++- + src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c | 10 +- + src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c | 20 +- + src/northbridge/amd/amdmct/mct_ddr3/mctwl.c | 255 +- + src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c | 1007 +++++-- + src/northbridge/amd/amdmct/mct_ddr3/mutilc_d.c | 69 +- + src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h | 46 +- + src/northbridge/amd/amdmct/mct_ddr3/s3utils.c | 652 +++- + src/northbridge/amd/amdmct/wrappers/mcti.h | 14 +- + src/northbridge/amd/amdmct/wrappers/mcti_d.c | 42 +- + 70 files changed, 9184 insertions(+), 2064 deletions(-) + create mode 100644 src/northbridge/amd/amdfam10/link_control.c + create mode 100644 src/northbridge/amd/amdfam10/nb_control.c + +diff --git a/src/cpu/amd/car/cache_as_ram.inc b/src/cpu/amd/car/cache_as_ram.inc +index 0b2bc60..6542906 100644 +--- a/src/cpu/amd/car/cache_as_ram.inc ++++ b/src/cpu/amd/car/cache_as_ram.inc +@@ -32,18 +32,23 @@ + #define CacheSizeAPStack CONFIG_DCACHE_AP_STACK_SIZE + + #define MSR_MCFG_BASE 0xC0010058 +-#define MSR_FAM10 0xC001102A ++#define MSR_BU_CFG2 0xC001102A + + #define jmp_if_k8(x) comisd %xmm2, %xmm1; jb x ++#define jmp_if_not_fam15h(x) comisd %xmm3, %xmm1; jb x ++#define jmp_if_fam15h(x) comisd %xmm3, %xmm1; jae x + + #define CPUID_MASK 0x0ff00f00 + #define CPUID_VAL_FAM10_ROTATED 0x0f000010 ++#define CPUID_VAL_FAM15_ROTATED 0x0f000060 + + /* + * XMM map: + * xmm1: CPU family + * xmm2: Fam10h comparison value +- * xmm3: Backup EBX ++ * xmm3: Fam15h comparison value ++ * xmm4: Backup EBX ++ * xmm5: Coreboot init detect + */ + + /* Save the BIST result. */ +@@ -63,7 +68,7 @@ cache_as_ram_setup: + movl %eax, %cr4 + + /* Figure out the CPU family. */ +- cvtsi2sd %ebx, %xmm3 ++ cvtsi2sd %ebx, %xmm4 + movl $0x01, %eax + cpuid + /* Base family is bits 8..11, extended family is bits 20..27. */ +@@ -73,13 +78,16 @@ cache_as_ram_setup: + cvtsi2sd %eax, %xmm1 + movl $CPUID_VAL_FAM10_ROTATED, %eax + cvtsi2sd %eax, %xmm2 +- cvtsd2si %xmm3, %ebx ++ movl $CPUID_VAL_FAM15_ROTATED, %eax ++ cvtsi2sd %eax, %xmm3 ++ cvtsd2si %xmm4, %ebx + + /* Check if cpu_init_detected. */ + movl $MTRR_DEF_TYPE_MSR, %ecx + rdmsr + andl $MTRR_DEF_TYPE_EN, %eax + movl %eax, %ebx /* We store the status. */ ++ cvtsi2sd %ebx, %xmm5 + + jmp_if_k8(CAR_FAM10_out_post_errata) + +@@ -120,21 +128,24 @@ cache_as_ram_setup: + + CAR_FAM10_out: + ++ jmp_if_fam15h(CAR_FAM10_errata_applied) + /* + * Errata 193: Disable clean copybacks to L3 cache to allow cached ROM. + * Re-enable it in after RAM is initialized and before CAR is disabled. + */ +- movl $MSR_FAM10, %ecx ++ movl $MSR_BU_CFG2, %ecx + rdmsr +- bts $15, %eax ++ bts $15, %eax /* Set bit 15 in EDX:EAX (bit 15 in EAX). */ + wrmsr + + /* Erratum 343, RevGuide for Fam10h, Pub#41322 Rev. 3.33 */ +- movl $MSR_FAM10, %ecx ++ movl $MSR_BU_CFG2, %ecx + rdmsr + bts $35-32, %edx /* Set bit 35 in EDX:EAX (bit 3 in EDX). */ + wrmsr + ++CAR_FAM10_errata_applied: ++ + #if CONFIG_MMCONF_SUPPORT + #if (CONFIG_MMCONF_BASE_ADDRESS > 0xFFFFFFFF) + #error "MMCONF_BASE_ADDRESS too big" +@@ -169,6 +180,63 @@ CAR_FAM10_out: + + CAR_FAM10_out_post_errata: + ++ /* Fam15h APIC IDs do not depend on NB config bit 54 */ ++ jmp_if_not_fam15h(skip_nb54_set) ++ movl $0xc001001f, %ecx /* NB_CFG_MSR */ ++ rdmsr ++ bts $(54 - 32), %edx /* Set NB config bit 54 */ ++ wrmsr ++ ++skip_nb54_set: ++ /* On Fam15h CPUs each compute unit's MTRRs are shared between two cores */ ++ jmp_if_not_fam15h(skip_cu_check) ++ ++ /* Get the initial APIC ID. */ ++ movl $1, %eax ++ cpuid ++ movl %ebx, %eax ++ ++ /* Restore init detect */ ++ cvtsd2si %xmm5, %ebx ++ ++ /* Determine if this is the second core to start in a compute unit; if so, wait for first core start, clear init detect and skip MTRR init */ ++ bt $24, %eax ++ jnc skip_cu_check /* First core in the compute unit jumps to skip_cu_check */ ++ ++ /* Determine if this is the second core to start in a compute unit; if so, clear init detect and skip MTRR init */ ++ /* Busywait until the first core sets up the MTRRs */ ++check_init_detect_1: ++ /* Check if cpu_init_detected. */ ++ movl $MTRR_DEF_TYPE_MSR, %ecx ++ rdmsr ++ andl $MTRR_DEF_TYPE_EN, %eax ++ cmp $0x00000000, %eax ++ je check_init_detect_1 /* First core has not yet started */ ++ ++check_init_detect_2: ++ movl $SYSCFG_MSR, %ecx ++ rdmsr ++ andl $(SYSCFG_MSR_MtrrFixDramEn | SYSCFG_MSR_MtrrVarDramEn), %eax ++ cmp $0x00000000, %eax ++ je check_init_detect_2 /* First core has not yet started */ ++ ++ /* First core has now started */ ++ movl $0x00000000, %ebx /* Clear init detect flag */ ++ cvtsi2sd %ebx, %xmm5 ++ jmp fam10_mtrr_setup_complete ++ ++skip_cu_check: ++ ++ jmp_if_not_fam15h(CAR_FAM15_errata_applied) ++ ++ /* Erratum 714, RevGuide for Fam15h, Pub#48063 Rev. 3.24 */ ++ movl $MSR_BU_CFG2, %ecx ++ rdmsr ++ bts $8, %eax /* Set bit 8 in EDX:EAX (bit 8 in EAX). */ ++ wrmsr ++ ++CAR_FAM15_errata_applied: ++ + /* Set MtrrFixDramModEn for clear fixed MTRR. */ + enable_fixed_mtrr_dram_modify: + movl $SYSCFG_MSR, %ecx +@@ -337,8 +405,42 @@ wbcache_post_fam10_setup: + orl $(SYSCFG_MSR_MtrrVarDramEn | SYSCFG_MSR_MtrrFixDramEn), %eax + wrmsr + ++fam10_mtrr_setup_complete: + post_code(0xa1) + ++ /* Disable conversion of INVD to WBINVD (INVDWBINVD = 0) */ ++ mov $0xc0010015, %ecx ++ rdmsr ++ btr $4, %eax ++ wrmsr ++ ++jmp_if_not_fam15h(fam15_car_msr_setup_complete) ++ /* Disable streaming store (DisSS = 1) */ ++ mov $0xc0011020, %ecx ++ rdmsr ++ bts $28, %eax ++ wrmsr ++ ++ /* Disable speculative ITLB reloads (DisSpecTlbRld = 1) */ ++ mov $0xc0011021, %ecx ++ rdmsr ++ bts $9, %eax ++ wrmsr ++ ++ /* Disable speculative DTLB reloads (DisSpecTlbRld = 1) and set DisHwPf = 1 */ ++ mov $0xc0011022, %ecx ++ rdmsr ++ bts $4, %eax ++ bts $13, %eax ++ wrmsr ++ ++ /* Disable CR0 combining (CombineCr0Cd = 0) */ ++ mov $0xc001102b, %ecx ++ rdmsr ++ btr $49-32, %edx ++ wrmsr ++fam15_car_msr_setup_complete: ++ + /* Enable cache. */ + movl %cr0, %eax + andl $(~(CR0_CacheDisable | CR0_NoWriteThrough)), %eax +@@ -393,9 +495,6 @@ CAR_FAM10_ap: + * to reverse it. + */ + +- /* Store our init detected. */ +- movl %ebx, %esi +- + /* Get the coreid bits at first. */ + movl $0x80000008, %eax + cpuid +@@ -414,6 +513,8 @@ CAR_FAM10_ap: + movl %edi, %ecx /* CoreID bits */ + bt $(54 - 32), %edx + jc roll_cfg ++ ++ /* Fam10h NB config bit 54 was not set */ + rolb %cl, %bl + roll_cfg: + +@@ -423,8 +524,8 @@ roll_cfg: + movl $(CacheBase + (CacheSize - (CacheSizeBSPStack + CacheSizeBSPSlush))), %esp + subl %eax, %esp + +- /* Retrive init detected. */ +- movl %esi, %ebx ++ /* Restore init detect */ ++ cvtsd2si %xmm5, %ebx + + post_code(0xa4) + +@@ -437,6 +538,8 @@ CAR_FAM10_ap_out: + andl $~(3 << 9), %eax + movl %eax, %cr4 + ++ post_code(0xa6) ++ + /* Restore the BIST result. */ + movl %ebp, %eax + +@@ -444,6 +547,9 @@ CAR_FAM10_ap_out: + movl %esp, %ebp + pushl %ebx /* Init detected. */ + pushl %eax /* BIST */ ++ ++ post_code(0xa7) ++ + call cache_as_ram_main + + /* We will not go back. */ +diff --git a/src/cpu/amd/car/disable_cache_as_ram.c b/src/cpu/amd/car/disable_cache_as_ram.c +index 5eccf79..5cab544 100644 +--- a/src/cpu/amd/car/disable_cache_as_ram.c ++++ b/src/cpu/amd/car/disable_cache_as_ram.c +@@ -19,7 +19,7 @@ + * along with this program; if not, write to the Free Software + * Foundation, Inc. + * +- * be warned, this file will be used other cores and core 0 / node 0 ++ * WARNING: this file will be used by both any AP cores and core 0 / node 0 + */ + + #include <cpu/x86/cache.h> +@@ -34,41 +34,78 @@ static inline __attribute__((always_inline)) uint32_t amd_fam1x_cpu_family(void) + return family; + } + +-static inline __attribute__((always_inline)) void disable_cache_as_ram(void) ++static inline __attribute__((always_inline)) void disable_cache_as_ram(uint8_t skip_sharedc_config) + { + msr_t msr; ++ uint32_t family; + +- /* disable cache */ +- write_cr0(read_cr0() | CR0_CacheDisable); ++ if (!skip_sharedc_config) { ++ /* disable cache */ ++ write_cr0(read_cr0() | CR0_CacheDisable); + +- msr.lo = 0; +- msr.hi = 0; +- wrmsr(MTRR_FIX_4K_C8000, msr); ++ msr.lo = 0; ++ msr.hi = 0; ++ wrmsr(MTRR_FIX_4K_C8000, msr); + #if CONFIG_DCACHE_RAM_SIZE > 0x8000 +- wrmsr(MTRR_FIX_4K_C0000, msr); ++ wrmsr(MTRR_FIX_4K_C0000, msr); + #endif + #if CONFIG_DCACHE_RAM_SIZE > 0x10000 +- wrmsr(MTRR_FIX_4K_D0000, msr); ++ wrmsr(MTRR_FIX_4K_D0000, msr); + #endif + #if CONFIG_DCACHE_RAM_SIZE > 0x18000 +- wrmsr(MTRR_FIX_4K_D8000, msr); ++ wrmsr(MTRR_FIX_4K_D8000, msr); + #endif +- /* disable fixed mtrr from now on, it will be enabled by ramstage again*/ ++ /* disable fixed mtrr from now on, it will be enabled by ramstage again */ ++ msr = rdmsr(SYSCFG_MSR); ++ msr.lo &= ~(SYSCFG_MSR_MtrrFixDramEn | SYSCFG_MSR_MtrrFixDramModEn); ++ wrmsr(SYSCFG_MSR, msr); ++ ++ /* Set the default memory type and disable fixed and enable variable MTRRs */ ++ msr.hi = 0; ++ msr.lo = (1 << 11); ++ ++ wrmsr(MTRR_DEF_TYPE_MSR, msr); ++ ++ enable_cache(); ++ } ++ ++ /* INVDWBINVD = 1 */ ++ msr = rdmsr(0xc0010015); ++ msr.lo |= (0x1 << 4); ++ wrmsr(0xc0010015, msr); ++ ++ family = amd_fam1x_cpu_family(); ++ ++ if (family >= 0x6f) { ++ /* Family 15h or later */ + +- msr = rdmsr(SYSCFG_MSR); +- msr.lo &= ~(SYSCFG_MSR_MtrrFixDramEn | SYSCFG_MSR_MtrrFixDramModEn); +- wrmsr(SYSCFG_MSR, msr); ++ /* DisSS = 0 */ ++ msr = rdmsr(0xc0011020); ++ msr.lo &= ~(0x1 << 28); ++ wrmsr(0xc0011020, msr); + +- /* Set the default memory type and disable fixed and enable variable MTRRs */ +- msr.hi = 0; +- msr.lo = (1 << 11); ++ if (!skip_sharedc_config) { ++ /* DisSpecTlbRld = 0 */ ++ msr = rdmsr(0xc0011021); ++ msr.lo &= ~(0x1 << 9); ++ wrmsr(0xc0011021, msr); + +- wrmsr(MTRR_DEF_TYPE_MSR, msr); ++ /* Erratum 714: SpecNbReqDis = 0 */ ++ msr = rdmsr(BU_CFG2_MSR); ++ msr.lo &= ~(0x1 << 8); ++ wrmsr(BU_CFG2_MSR, msr); ++ } + +- enable_cache(); ++ /* DisSpecTlbRld = 0 */ ++ /* DisHwPf = 0 */ ++ msr = rdmsr(0xc0011022); ++ msr.lo &= ~(0x1 << 4); ++ msr.lo &= ~(0x1 << 13); ++ wrmsr(0xc0011022, msr); ++ } + } + + static void disable_cache_as_ram_bsp(void) + { +- disable_cache_as_ram(); ++ disable_cache_as_ram(0); + } +diff --git a/src/cpu/amd/family_10h-family_15h/defaults.h b/src/cpu/amd/family_10h-family_15h/defaults.h +index 6fd1a7e..24f87ba 100644 +--- a/src/cpu/amd/family_10h-family_15h/defaults.h ++++ b/src/cpu/amd/family_10h-family_15h/defaults.h +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2008 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -25,41 +26,65 @@ + */ + static const struct { + u32 msr; +- u32 revision; ++ uint64_t revision; + u32 platform; + u32 data_lo; + u32 data_hi; + u32 mask_lo; + u32 mask_hi; + } fam10_msr_default[] = { +- { TOP_MEM2, AMD_FAM10_ALL, AMD_PTYPE_ALL, ++ { TOP_MEM2, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, + 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF }, + +- { SYSCFG, AMD_FAM10_ALL, AMD_PTYPE_ALL, ++ { SYSCFG, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, + 3 << 21, 0x00000000, + 3 << 21, 0x00000000 }, /* [MtrrTom2En]=1,[TOM2EnWB] = 1*/ + +- { HWCR, AMD_FAM10_ALL, AMD_PTYPE_ALL, +- 1 << 4, 0x00000000, +- 1 << 4, 0x00000000 }, /* [INVD_WBINVD]=1 */ ++ { MC1_CTL_MASK, AMD_OR_B2, AMD_PTYPE_ALL, ++ 1 << 18, 0x00000000, ++ 1 << 18, 0x00000000 }, /* Erratum 586: [DEIBP]=1 */ + +- { MC4_CTL_MASK, AMD_FAM10_ALL, AMD_PTYPE_ALL, ++ { MC1_CTL_MASK, AMD_OR_B2, AMD_PTYPE_ALL, ++ 1 << 15, 0x00000000, ++ 1 << 15, 0x00000000 }, /* Erratum 593: [BSRP]=1 */ ++ ++ { MC1_CTL_MASK, AMD_OR_C0, AMD_PTYPE_ALL, ++ 1 << 15, 0x00000000, ++ 1 << 15, 0x00000000 }, /* Erratum 739: [BSRP]=1 */ ++ ++ { 0xc0011000, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 1 << 16, 0x00000000, ++ 1 << 16, 0x00000000 }, /* Erratum 608: [bit 16]=1 */ ++ ++ { 0xc0011000, AMD_OR_C0, AMD_PTYPE_ALL, ++ 1 << 15, 0x00000000, ++ 1 << 15, 0x00000000 }, /* Erratum 727: [bit 15]=1 */ ++ ++ { MC4_CTL_MASK, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, + 0xF << 19, 0x00000000, + 0xF << 19, 0x00000000 }, /* [RtryHt[0..3]]=1 */ + ++ { MC4_CTL_MASK, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, ++ 1 << 10, 0x00000000, ++ 1 << 10, 0x00000000 }, /* [GartTblWkEn]=1 */ ++ + { DC_CFG, AMD_FAM10_ALL, AMD_PTYPE_SVR, + 0x00000000, 0x00000004, +- 0x00000000, 0x0000000C }, /* [REQ_CTR] = 1 for Server */ ++ 0x00000000, 0x0000000C }, /* Family 10h: [REQ_CTR] = 1 for Server */ + + { DC_CFG, AMD_DR_Bx, AMD_PTYPE_SVR, + 0x00000000, 0x00000000, + 0x00000000, 0x00000C00 }, /* Erratum 326 */ + +- { NB_CFG, AMD_FAM10_ALL, AMD_PTYPE_DC | AMD_PTYPE_MC, ++ { NB_CFG, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_DC | AMD_PTYPE_MC, + 0x00000000, 1 << 22, + 0x00000000, 1 << 22 }, /* [ApicInitIDLo]=1 */ + ++ { NB_CFG, AMD_FAM15_ALL, AMD_PTYPE_DC | AMD_PTYPE_MC, ++ 1 << 23, 0x00000000, ++ 1 << 23, 0x00000000 }, /* Erratum 663: [bit 23]=1 */ ++ + { BU_CFG2, AMD_DR_Bx, AMD_PTYPE_ALL, + 1 << 29, 0x00000000, + 1 << 29, 0x00000000 }, /* For Bx Smash1GPages=1 */ +@@ -72,6 +97,14 @@ static const struct { + 0 << 1, 0x00000000, + 1 << 1, 0x00000000 }, /* IDX_MATCH_ALL=0 */ + ++ { IC_CFG, AMD_OR_C0, AMD_PTYPE_ALL, ++ 0x00000000, 1 << (39-32), ++ 0x00000000, 1 << (39-32)}, /* C0 or above [DisLoopPredictor]=1 */ ++ ++ { IC_CFG, AMD_OR_C0, AMD_PTYPE_ALL, ++ 0xf << 1, 0x00000000, ++ 0xf << 1, 0x00000000}, /* C0 or above [DisIcWayFilter]=0xf */ ++ + { BU_CFG, AMD_DR_LT_B3, AMD_PTYPE_ALL, + 1 << 21, 0x00000000, + 1 << 21, 0x00000000 }, /* Erratum #254 DR B1 BU_CFG[21]=1 */ +@@ -80,19 +113,51 @@ static const struct { + 1 << 23, 0x00000000, + 1 << 23, 0x00000000 }, /* Erratum #309 BU_CFG[23]=1 */ + ++ { BU_CFG, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0 << 10, 0x00000000, ++ 1 << 10, 0x00000000 }, /* [DcacheAgressivePriority]=0 */ ++ + /* CPUID_EXT_FEATURES */ +- { CPUIDFEATURES, AMD_FAM10_ALL, AMD_PTYPE_DC | AMD_PTYPE_MC, ++ { CPUIDFEATURES, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_DC | AMD_PTYPE_MC, + 1 << 28, 0x00000000, + 1 << 28, 0x00000000 }, /* [HyperThreadFeatEn]=1 */ + +- { CPUIDFEATURES, AMD_FAM10_ALL, AMD_PTYPE_DC, ++ { CPUIDFEATURES, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_DC, + 0x00000000, 1 << (33-32), + 0x00000000, 1 << (33-32) }, /* [ExtendedFeatEn]=1 */ + ++ { DE_CFG, AMD_OR_B2, AMD_PTYPE_ALL, ++ 1 << 10, 0x00000000, ++ 1 << 10, 0x00000000 }, /* Bx [ResyncPredSingleDispDis]=1 */ ++ + { BU_CFG2, AMD_DRBH_Cx, AMD_PTYPE_ALL, + 0x00000000, 1 << (35-32), + 0x00000000, 1 << (35-32) }, /* Erratum 343 (set to 0 after CAR, in post_cache_as_ram()/model_10xxx_init() ) */ + ++ { BU_CFG3, AMD_OR_B2, AMD_PTYPE_ALL, ++ 0x00000000, 1 << (42-32), ++ 0x00000000, 1 << (42-32)}, /* Bx [PwcDisableWalkerSharing]=1 */ ++ ++ { BU_CFG3, AMD_OR_C0, AMD_PTYPE_ALL, ++ 1 << 22, 0x00000000, ++ 1 << 22, 0x00000000}, /* C0 or above [PfcDoubleStride]=1 */ ++ ++ { EX_CFG, AMD_OR_C0, AMD_PTYPE_ALL, ++ 0x00000000, 1 << (54-32), ++ 0x00000000, 1 << (54-32)}, /* C0 or above [LateSbzResync]=1 */ ++ ++ { LS_CFG2, AMD_OR_C0, AMD_PTYPE_ALL, ++ 1 << 23, 0x00000000, ++ 1 << 23, 0x00000000}, /* C0 or above [DisScbThreshold]=1 */ ++ ++ { LS_CFG2, AMD_OR_C0, AMD_PTYPE_ALL, ++ 1 << 14, 0x00000000, ++ 1 << 14, 0x00000000}, /* C0 or above [ForceSmcCheckFlowStDis]=1 */ ++ ++ { LS_CFG2, AMD_OR_C0, AMD_PTYPE_ALL, ++ 1 << 12, 0x00000000, ++ 1 << 12, 0x00000000}, /* C0 or above [ForceBusLockDis]=1 */ ++ + { OSVW_ID_Length, AMD_DR_Bx | AMD_DR_Cx | AMD_DR_Dx, AMD_PTYPE_ALL, + 0x00000004, 0x00000000, + 0x00000004, 0x00000000}, /* B0 or Above, OSVW_ID_Length is 0004h */ +@@ -105,9 +170,45 @@ static const struct { + 0x00000000, 1 << (50-32), + 0x00000000, 1 << (50-32)}, /* D0 or Above, RdMmExtCfgQwEn*/ + ++ { BU_CFG2, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x00000000, 0x0 << (36-32), ++ 0x00000000, 0x3 << (36-32)}, /* [ThrottleNbInterface]=0 */ ++ ++ { BU_CFG2, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 1 << 10, 0x00000000, ++ 1 << 10, 0x00000000}, /* [VicResyncChkEn]=1 */ ++ ++ { BU_CFG2, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 1 << 11, 0x00000000, ++ 1 << 11, 0x00000000}, /* Erratum 503: [bit 11]=1 */ ++ + { CPU_ID_EXT_FEATURES_MSR, AMD_DR_Dx, AMD_PTYPE_ALL, + 0x00000000, 1 << (51 - 32), + 0x00000000, 1 << (51 - 32)}, /* G34_PKG | C32_PKG | S1G4_PKG | ASB2_PKG */ ++ ++ { CPU_ID_EXT_FEATURES_MSR, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x00000000, 1 << (56 - 32), ++ 0x00000000, 1 << (56 - 32)}, /* [PerfCtrExtNB]=1 */ ++ ++ { CPU_ID_EXT_FEATURES_MSR, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x00000000, 1 << (55 - 32), ++ 0x00000000, 1 << (55 - 32)}, /* [PerfCtrExtCore]=1 */ ++ ++ { IBS_OP_DATA3, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0 << 16, 0x00000000, ++ 1 << 16, 0x00000000}, /* [IbsDcMabHit]=0 */ ++ ++ { MC4_MISC0, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x00000000, 0x1 << (52-32), ++ 0x00000000, 0xf << (52-32)}, /* [LvtOffset]=1 */ ++ ++ { MC4_MISC1, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x00000000, 0x1 << (52-32), ++ 0x00000000, 0xf << (52-32)}, /* [LvtOffset]=1 */ ++ ++ { MC4_MISC2, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x00000000, 0x1 << (52-32), ++ 0x00000000, 0xf << (52-32)}, /* [LvtOffset]=1 */ + }; + + +@@ -117,37 +218,46 @@ static const struct { + static const struct { + u8 function; + u16 offset; +- u32 revision; ++ uint64_t revision; + u32 platform; + u32 data; + u32 mask; + } fam10_pci_default[] = { + + /* Function 0 - HT Config */ ++ { 0, 0x68, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, ++ 0x000e0000, 0x000e0000 }, /* [19:17] for 8bit APIC config */ ++ ++ { 0, 0x68, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, ++ 0x00400000, 0x00600000 }, /* [22:21] DsNpReqLmt = 10b */ + +- { 0, 0x68, AMD_FAM10_ALL, AMD_PTYPE_ALL, +- 0x004E4800, 0x006E6800 }, /* [19:17] for 8bit APIC config, +- [14:13] BufPriRel = 2h [11] RspPassPW set, +- [22:21] DsNpReqLmt = 10b */ ++ { 0, 0x68, AMD_FAM10_LT_D, AMD_PTYPE_ALL, ++ 0x00004000, 0x00006000 }, /* [14:13] BufRelPri = 2h */ ++ ++ { 0, 0x68, (AMD_FAM10_REV_D | AMD_FAM15_ALL), AMD_PTYPE_ALL, ++ 0x00002000, 0x00006000 }, /* [14:13] BufRelPri = 1h */ ++ ++ { 0, 0x68, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, ++ 0x00000800, 0x00000800 }, /* [11] RspPassPW = 1 */ + + /* Errata 281 Workaround */ + { 0, 0x68, (AMD_DR_B0 | AMD_DR_B1), + AMD_PTYPE_SVR, 0x00200000, 0x00600000 }, /* [22:21] DsNpReqLmt0 = 01b */ + +- { 0, 0x84, AMD_FAM10_ALL, AMD_PTYPE_ALL, ++ { 0, 0x84, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, + 0x00002000, 0x00002000 }, /* [13] LdtStopTriEn = 1 */ + +- { 0, 0xA4, AMD_FAM10_ALL, AMD_PTYPE_ALL, ++ { 0, 0xA4, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, + 0x00002000, 0x00002000 }, /* [13] LdtStopTriEn = 1 */ + +- { 0, 0xC4, AMD_FAM10_ALL, AMD_PTYPE_ALL, ++ { 0, 0xC4, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, + 0x00002000, 0x00002000 }, /* [13] LdtStopTriEn = 1 */ + +- { 0, 0xE4, AMD_FAM10_ALL, AMD_PTYPE_ALL, ++ { 0, 0xE4, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, + 0x00002000, 0x00002000 }, /* [13] LdtStopTriEn = 1 */ + + /* Link Global Retry Control Register */ +- { 0, 0x150, AMD_FAM10_ALL, AMD_PTYPE_ALL, ++ { 0, 0x150, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, + 0x00073900, 0x00073F00 }, + + /* Errata 351 +@@ -172,13 +282,39 @@ static const struct { + 0x00000000, 0x00000100 }, + { 0, 0x18C, AMD_FAM10_ALL, AMD_PTYPE_ALL, + 0x00000000, 0x00000100 }, +- { 0, 0x170, AMD_FAM10_ALL, AMD_PTYPE_ALL, +- 0x00000000, 0x00000100 }, + + /* Link Global Extended Control Register */ + { 0, 0x16C, AMD_FAM10_ALL, AMD_PTYPE_ALL, + 0x00000014, 0x0000003F }, /* [15:13] ForceFullT0 = 0b, +- * Set T0Time 14h per BKDG */ ++ * Set T0Time 14h per BKDG */ ++ ++ { 0, 0x170, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x00000100, 0x00000100 }, ++ { 0, 0x174, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x00000100, 0x00000100 }, ++ { 0, 0x178, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x00000100, 0x00000100 }, ++ { 0, 0x17C, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x00000100, 0x00000100 }, ++ { 0, 0x180, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x00000100, 0x00000100 }, ++ { 0, 0x184, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x00000100, 0x00000100 }, ++ { 0, 0x188, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x00000100, 0x00000100 }, ++ { 0, 0x18C, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x00000100, 0x00000100 }, ++ ++ /* Link Global Extended Control Register */ ++ { 0, 0x16C, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x00000014, 0x0000003F }, /* [15:13] ForceFullT0 = 111b, ++ * Set T0Time 26h per BKDG */ ++ ++ { 0, 0x16C, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x7 << 13, 0x7 << 13 }, /* [15:13] ForceFullT0 = 7h */ ++ ++ { 0, 0x16C, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x26, 0x3f }, /* [5:0] T0Time = 26h */ + + + /* Function 1 - Map Init */ +@@ -205,10 +341,10 @@ static const struct { + /* Function 2 - DRAM Controller */ + + /* Function 3 - Misc. Control */ +- { 3, 0x40, AMD_FAM10_ALL, AMD_PTYPE_ALL, ++ { 3, 0x40, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, + 0x00000100, 0x00000100 }, /* [8] MstrAbrtEn */ + +- { 3, 0x44, AMD_FAM10_ALL, AMD_PTYPE_ALL, ++ { 3, 0x44, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, + 0x4A30005C, 0x4A30005C }, /* [30] SyncOnDramAdrParErrEn = 1, + [27] NbMcaToMstCpuEn = 1, + [25] DisPciCfgCpuErrRsp = 1, +@@ -220,8 +356,12 @@ static const struct { + [2] SyncOnUcEccEn = 1 */ + + /* XBAR buffer settings */ +- { 3, 0x6C, AMD_FAM10_ALL, AMD_PTYPE_ALL, +- 0x00018052, 0x700780F7 }, ++ { 3, 0x6c, AMD_FAM10_ALL, AMD_PTYPE_ALL, ++ 0x00018052, 0x700780f7 }, ++ ++ /* XBAR buffer settings */ ++ { 3, 0x6c, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x10010052, 0x700700f7 }, + + /* Errata 281 Workaround */ + { 3, 0x6C, ( AMD_DR_B0 | AMD_DR_B1), +@@ -233,12 +373,18 @@ static const struct { + { 3, 0x70, AMD_FAM10_ALL, AMD_PTYPE_ALL, + 0x00041153, 0x777777F7 }, + ++ { 3, 0x70, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x10171155, 0x777777f7 }, ++ + { 3, 0x70, AMD_FAM10_ALL, AMD_PTYPE_UMA, + 0x61221151, 0x777777F7 }, + + { 3, 0x74, AMD_FAM10_ALL, AMD_PTYPE_UMA, + 0x00080101, 0x000F7777 }, + ++ { 3, 0x74, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x00172111, 0x77ff7777 }, ++ + { 3, 0x7C, AMD_FAM10_ALL, AMD_PTYPE_ALL, + 0x00090914, 0x707FFF1F }, + +@@ -246,12 +392,18 @@ static const struct { + { 3, 0x7C, ( AMD_DR_B0 | AMD_DR_B1), + AMD_PTYPE_SVR, 0x00144514, 0x707FFF1F }, + ++ { 3, 0x7C, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x040d0f16, 0x07ffff1f }, ++ + { 3, 0x7C, AMD_FAM10_ALL, AMD_PTYPE_UMA, + 0x00070814, 0x007FFF1F }, + + { 3, 0x140, AMD_FAM10_ALL, AMD_PTYPE_ALL, + 0x00800756, 0x00F3FFFF }, + ++ { 3, 0x140, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x00a11755, 0x00f3ffff }, ++ + { 3, 0x140, AMD_FAM10_ALL, AMD_PTYPE_UMA, + 0x00C37756, 0x00F3FFFF }, + +@@ -263,6 +415,9 @@ static const struct { + AMD_PTYPE_SVR, 0x00000001, 0x0000000F }, + /* [3:0] RspTok = 0001b */ + ++ { 3, 0x144, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x00000028, 0x000000ff }, ++ + { 3, 0x148, AMD_FAM10_ALL, AMD_PTYPE_UMA, + 0x8000052A, 0xD5FFFFFF }, + +@@ -270,41 +425,53 @@ static const struct { + { 3, 0x80, AMD_FAM10_ALL, AMD_PTYPE_ALL, + 0xE6002200, 0xFFFFFFFF }, + ++ /* ACPI Power State Control Reg1 */ ++ { 3, 0x80, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0xe20be200, 0xefefef00 }, ++ + /* ACPI Power State Control Reg2 */ + { 3, 0x84, AMD_FAM10_ALL, AMD_PTYPE_ALL, + 0xA0E641E6, 0xFFFFFFFF }, + ++ /* ACPI Power State Control Reg2 */ ++ { 3, 0x84, AMD_FAM15_ALL, AMD_PTYPE_ALL, ++ 0x01e200e2, 0xefef00ef }, ++ + { 3, 0xA0, AMD_FAM10_ALL, AMD_PTYPE_MOB | AMD_PTYPE_DSK, + 0x00000080, 0x00000080 }, /* [7] PSIVidEnable */ + + { 3, 0xA0, AMD_DR_Bx, AMD_PTYPE_ALL, + 0x00002800, 0x000003800 }, /* [13:11] PllLockTime = 5 */ + +- { 3, 0xA0, (AMD_FAM10_ALL & ~(AMD_DR_Bx)), AMD_PTYPE_ALL, ++ { 3, 0xA0, ((AMD_FAM10_ALL | AMD_FAM15_ALL) & ~(AMD_DR_Bx)), AMD_PTYPE_ALL, + 0x00000800, 0x000003800 }, /* [13:11] PllLockTime = 1 */ + + /* Reported Temp Control Register */ +- { 3, 0xA4, AMD_FAM10_ALL, AMD_PTYPE_ALL, ++ { 3, 0xA4, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, + 0x00000080, 0x00000080 }, /* [7] TempSlewDnEn = 1 */ + + /* Clock Power/Timing Control 0 Register */ +- { 3, 0xD4, AMD_FAM10_ALL, AMD_PTYPE_ALL, ++ { 3, 0xD4, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, + 0xC0000F00, 0xF0000F00 }, /* [31] NbClkDivApplyAll = 1, + [30:28] NbClkDiv = 100b,[11:8] ClkRampHystSel = 1111b */ + + /* Clock Power/Timing Control 1 Register */ ++ { 3, 0xD8, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, ++ 0x03000010, 0x0F000070 }, /* [6:4] VSRampTime = 1, ++ * [27:24] ReConDel = 3 */ ++ ++ /* Clock Power/Timing Control 1 Register */ + { 3, 0xD8, AMD_FAM10_ALL, AMD_PTYPE_ALL, +- 0x03000016, 0x0F000077 }, /* [6:4] VSRampTime = 1, +- [2:0] VSSlamTime = 6, [27:24] ReConDel = 3 */ ++ 0x00000006, 0x00000007 }, /* [2:0] VSSlamTime = 6 */ + + + /* Clock Power/Timing Control 2 Register */ +- { 3, 0xDC, AMD_FAM10_ALL, AMD_PTYPE_ALL, ++ { 3, 0xDC, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, + 0x00005000, 0x00007000 }, /* [14:12] NbsynPtrAdj = 5 */ + + + /* Extended NB MCA Config Register */ +- { 3, 0x180, AMD_FAM10_ALL, AMD_PTYPE_ALL, ++ { 3, 0x180, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, + 0x007003E2, 0x007003E2 }, /* [22:20] = SyncFloodOn_Err = 7, + [9] SyncOnUncNbAryEn = 1 , + [8] SyncOnProtEn = 1, +@@ -319,12 +486,17 @@ static const struct { + 0x00400000, 0x00400000 }, + + /* L3 Control Register */ +- { 3, 0x1B8, AMD_FAM10_ALL, AMD_PTYPE_ALL, ++ { 3, 0x1b8, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, + 0x00001000, 0x00001000 }, /* [12] = L3PrivReplEn */ + + /* IBS Control Register */ +- { 3, 0x1CC, AMD_FAM10_ALL, AMD_PTYPE_ALL, ++ { 3, 0x1cc, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, + 0x00000100, 0x00000100 }, /* [8] = LvtOffsetVal */ ++ ++ /* Erratum 619 - Family 15h Bx ++ * System software should set F5x88[14] to 1b. */ ++ { 5, 0x88, AMD_OR_B2, AMD_PTYPE_ALL, ++ 1 << 14, 1 << 14 }, + }; + + +@@ -333,7 +505,7 @@ static const struct { + */ + static const struct { + u16 htreg; /* HT Phy Register index */ +- u32 revision; ++ uint64_t revision; + u32 platform; + u32 linktype; + u32 data; +@@ -442,38 +614,38 @@ static const struct { + { 0x530A, AMD_DR_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_ALL, + 0x00004400, 0x00006400 }, /* HT_PHY_DLL_REG */ + +- { 0xCF, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3, ++ { 0xCF, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3, + 0x00000000, 0x000000FF }, /* Provide clear setting for logical + completeness */ + +- { 0xDF, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3, ++ { 0xDF, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3, + 0x00000000, 0x000000FF }, /* Provide clear setting for logical + completeness */ + +- { 0xCF, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1, ++ { 0xCF, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1, + 0x0000006D, 0x000000FF }, /* HT_PHY_HT1_FIFO_PTR_OPT_VALUE */ + +- { 0xDF, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1, ++ { 0xDF, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1, + 0x0000006D, 0x000000FF }, /* HT_PHY_HT1_FIFO_PTR_OPT_VALUE */ + + /* Link Phy Receiver Loop Filter Registers */ +- { 0xD1, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3, ++ { 0xD1, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3, + 0x08040000, 0x3FFFC000 }, /* [29:22] LfcMax = 20h, + [21:14] LfcMin = 10h */ + +- { 0xC1, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3, ++ { 0xC1, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3, + 0x08040000, 0x3FFFC000 }, /* [29:22] LfcMax = 20h, + [21:14] LfcMin = 10h */ + +- { 0xD1, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1, ++ { 0xD1, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1, + 0x04020000, 0x3FFFC000 }, /* [29:22] LfcMax = 10h, + [21:14] LfcMin = 08h */ + +- { 0xC1, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1, ++ { 0xC1, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1, + 0x04020000, 0x3FFFC000 }, /* [29:22] LfcMax = 10h, + [21:14] LfcMin = 08h */ + +- { 0xC0, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_ALL, ++ { 0xC0, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_ALL, + 0x40040000, 0xe01F0000 }, /* [31:29] RttCtl = 02h, + [20:16] RttIndex = 04h */ + }; +diff --git a/src/cpu/amd/family_10h-family_15h/fidvid.c b/src/cpu/amd/family_10h-family_15h/fidvid.c +index 99ffcc8..2e26645 100644 +--- a/src/cpu/amd/family_10h-family_15h/fidvid.c ++++ b/src/cpu/amd/family_10h-family_15h/fidvid.c +@@ -44,7 +44,7 @@ Fam10 Bios and Kernel Development Guide #31116, rev 3.48, April 22, 2010 + + 3.- 2.4.2.7 dualPlaneOnly(dev) + +-4.- 2.4.2.8 applyBoostFIDOffset(dev) ++4.- 2.4.2.8 applyBoostFIDOffset(dev, nodeid) + + 5.- enableNbPState1(dev) + +@@ -143,25 +143,33 @@ static void enable_fid_change(u8 fid) + } + } + +-static void applyBoostFIDOffset( device_t dev ) { +- // BKDG 2.4.2.8 +- // revision E only, but E is apparently not supported yet, therefore untested +- if ((cpuid_edx(0x80000007) & CPB_MASK) +- && ((cpuid_ecx(0x80000008) & NC_MASK) ==5) ) { +- u32 core = get_node_core_id_x().coreid; +- u32 asymetricBoostThisCore = ((pci_read_config32(dev, 0x10C) >> (core*2))) & 3; +- msr_t msr = rdmsr(PS_REG_BASE); +- u32 cpuFid = msr.lo & PS_CPU_FID_MASK; +- cpuFid = cpuFid + asymetricBoostThisCore; +- msr.lo &= ~PS_CPU_FID_MASK; +- msr.lo |= cpuFid ; +- wrmsr(PS_REG_BASE , msr); +- +- } ++static void applyBoostFIDOffset(device_t dev, uint32_t nodeid) { ++ // BKDG 2.4.2.8 ++ // Fam10h revision E only, but E is apparently not supported yet, therefore untested ++ if ((cpuid_edx(0x80000007) & CPB_MASK) ++ && ((cpuid_ecx(0x80000008) & NC_MASK) == 5) ) { ++ u32 core = get_node_core_id_x().coreid; ++ u32 asymetricBoostThisCore = ((pci_read_config32(dev, 0x10C) >> (core*2))) & 3; ++ msr_t msr = rdmsr(PS_REG_BASE); ++ u32 cpuFid = msr.lo & PS_CPU_FID_MASK; ++ cpuFid = cpuFid + asymetricBoostThisCore; ++ msr.lo &= ~PS_CPU_FID_MASK; ++ msr.lo |= cpuFid ; ++ wrmsr(PS_REG_BASE , msr); ++ } else if (is_fam15h()) { ++ uint32_t dword = pci_read_config32(NODE_PCI(nodeid, 4), 0x15c); ++ uint8_t boost_count = (dword >> 2) & 0x7; ++ if (boost_count > 0) { ++ /* Enable boost */ ++ dword &= ~0x3; ++ dword |= 0x1; ++ pci_write_config32(NODE_PCI(nodeid, 4), 0x15c, dword); ++ } ++ } + } + + static void enableNbPState1( device_t dev ) { +- u32 cpuRev = mctGetLogicalCPUID(0xFF); ++ uint64_t cpuRev = mctGetLogicalCPUID(0xFF); + if (cpuRev & AMD_FAM10_C3) { + u32 nbPState = (pci_read_config32(dev, 0x1F0) & NB_PSTATE_MASK); + if ( nbPState){ +@@ -203,7 +211,7 @@ static u8 setPStateMaxVal( device_t dev ) { + static void dualPlaneOnly( device_t dev ) { + // BKDG 2.4.2.7 + +- u32 cpuRev = mctGetLogicalCPUID(0xFF); ++ uint64_t cpuRev = mctGetLogicalCPUID(0xFF); + if ((mctGetProcessorPackageType() == AMD_PKGTYPE_AM3_2r2) + && (cpuRev & AMD_DR_Cx)) { // should be rev C or rev E but there's no constant for E + if ( (pci_read_config32(dev, 0x1FC) & DUAL_PLANE_ONLY_MASK) +@@ -283,12 +291,16 @@ static void recalculateVsSlamTimeSettingOnCorePre(device_t dev) + */ + + /* Determine if this is a PVI or SVI system */ +- dtemp = pci_read_config32(dev, 0xA0); +- +- if (dtemp & PVI_MODE) +- pviModeFlag = 1; +- else ++ if (is_fam15h()) { + pviModeFlag = 0; ++ } else { ++ dtemp = pci_read_config32(dev, 0xa0); ++ ++ if (dtemp & PVI_MODE) ++ pviModeFlag = 1; ++ else ++ pviModeFlag = 0; ++ } + + /* Get P0's voltage */ + /* MSRC001_00[68:64] are not programmed yet when called from +@@ -515,59 +527,67 @@ static void config_nb_syn_ptr_adj(device_t dev, u32 cpuRev) { + } + + static void config_acpi_pwr_state_ctrl_regs(device_t dev, u32 cpuRev, u8 procPkg) { +- /* step 1, chapter 2.4.2.6 of AMD Fam 10 BKDG #31116 Rev 3.48 22.4.2010 */ +- u32 dword; +- u32 c1= 1; +- if (cpuRev & (AMD_DR_Bx)) { +- // will coreboot ever enable cache scrubbing ? +- // if it does, will it be enough to check the current state +- // or should we configure for what we'll set up later ? +- dword = pci_read_config32(dev, 0x58); +- u32 scrubbingCache = dword & +- ( (0x1F << 16) // DCacheScrub +- | (0x1F << 8) ); // L2Scrub +- if (scrubbingCache) { +- c1 = 0x80; +- } else { +- c1 = 0xA0; +- } +- } else { // rev C or later +- // same doubt as cache scrubbing: ok to check current state ? +- dword = pci_read_config32(dev, 0xDC); +- u32 cacheFlushOnHalt = dword & (7 << 16); +- if (!cacheFlushOnHalt) { +- c1 = 0x80; +- } +- } +- dword = (c1 << 24) | (0xE641E6); +- pci_write_config32(dev, 0x84, dword); +- +- +- /* FIXME: BKDG Table 100 says if the link is at a Gen1 +-frequency and the chipset does not support a 10us minimum LDTSTOP +-assertion time, then { If ASB2 && SVI then smaf001 = F6h else +-smaf001=87h. } else ... I hardly know what it means or how to check +-it from here, so I bluntly assume it is false and code here the else, +-which is easier */ +- +- u32 smaf001 = 0xE6; +- if (cpuRev & AMD_DR_Bx ) { +- smaf001 = 0xA6; +- } else { +- #if CONFIG_SVI_HIGH_FREQ +- if (cpuRev & (AMD_RB_C3 | AMD_DA_C3)) { +- smaf001 = 0xF6; +- } +- #endif +- } +- u32 fidvidChange = 0; +- if (((cpuRev & AMD_DA_Cx) && (procPkg & AMD_PKGTYPE_S1gX)) +- || (cpuRev & AMD_RB_C3) ) { +- fidvidChange=0x0B; +- } +- dword = (0xE6 << 24) | (fidvidChange << 16) +- | (smaf001 << 8) | 0x81; +- pci_write_config32(dev, 0x80, dword); ++ if (is_fam15h()) { ++ /* Family 15h BKDG Rev. 3.14 D18F3x80 recommended settings */ ++ pci_write_config32(dev, 0x80, 0xe20be281); ++ ++ /* Family 15h BKDG Rev. 3.14 D18F3x84 recommended settings */ ++ pci_write_config32(dev, 0x84, 0x01e200e2); ++ } else { ++ /* step 1, chapter 2.4.2.6 of AMD Fam 10 BKDG #31116 Rev 3.48 22.4.2010 */ ++ u32 dword; ++ u32 c1= 1; ++ if (cpuRev & (AMD_DR_Bx)) { ++ // will coreboot ever enable cache scrubbing ? ++ // if it does, will it be enough to check the current state ++ // or should we configure for what we'll set up later ? ++ dword = pci_read_config32(dev, 0x58); ++ u32 scrubbingCache = dword & ++ ( (0x1F << 16) // DCacheScrub ++ | (0x1F << 8) ); // L2Scrub ++ if (scrubbingCache) { ++ c1 = 0x80; ++ } else { ++ c1 = 0xA0; ++ } ++ } else { // rev C or later ++ // same doubt as cache scrubbing: ok to check current state ? ++ dword = pci_read_config32(dev, 0xDC); ++ u32 cacheFlushOnHalt = dword & (7 << 16); ++ if (!cacheFlushOnHalt) { ++ c1 = 0x80; ++ } ++ } ++ dword = (c1 << 24) | (0xE641E6); ++ pci_write_config32(dev, 0x84, dword); ++ ++ /* FIXME: BKDG Table 100 says if the link is at a Gen1 ++ * frequency and the chipset does not support a 10us minimum LDTSTOP ++ * assertion time, then { If ASB2 && SVI then smaf001 = F6h else ++ * smaf001=87h. } else ... I hardly know what it means or how to check ++ * it from here, so I bluntly assume it is false and code here the else, ++ * which is easier ++ */ ++ ++ u32 smaf001 = 0xE6; ++ if (cpuRev & AMD_DR_Bx ) { ++ smaf001 = 0xA6; ++ } else { ++ #if CONFIG_SVI_HIGH_FREQ ++ if (cpuRev & (AMD_RB_C3 | AMD_DA_C3)) { ++ smaf001 = 0xF6; ++ } ++ #endif ++ } ++ u32 fidvidChange = 0; ++ if (((cpuRev & AMD_DA_Cx) && (procPkg & AMD_PKGTYPE_S1gX)) ++ || (cpuRev & AMD_RB_C3) ) { ++ fidvidChange=0x0B; ++ } ++ dword = (0xE6 << 24) | (fidvidChange << 16) ++ | (smaf001 << 8) | 0x81; ++ pci_write_config32(dev, 0x80, dword); ++ } + } + + static void prep_fid_change(void) +@@ -584,7 +604,7 @@ static void prep_fid_change(void) + for (i = 0; i < nodes; i++) { + printk(BIOS_DEBUG, "Prep FID/VID Node:%02x\n", i); + dev = NODE_PCI(i, 3); +- u32 cpuRev = mctGetLogicalCPUID(0xFF) ; ++ uint64_t cpuRev = mctGetLogicalCPUID(0xFF) ; + u8 procPkg = mctGetProcessorPackageType(); + + setVSRamp(dev); +@@ -612,7 +632,7 @@ static void prep_fid_change(void) + } + } + +-static void waitCurrentPstate(u32 target_pstate){ ++static void waitCurrentPstate(u32 target_pstate) { + msr_t initial_msr = rdmsr(TSC_MSR); + msr_t pstate_msr = rdmsr(CUR_PSTATE_MSR); + msr_t tsc_msr; +@@ -645,7 +665,7 @@ static void waitCurrentPstate(u32 target_pstate){ + + if (pstate_msr.lo != target_pstate) { + msr_t limit_msr = rdmsr(0xc0010061); +- printk(BIOS_ERR, "*** Time out waiting for P-state %01x. Current P-state %01x P-state current limit MSRC001_0061=%02x\n", target_pstate, pstate_msr.lo, limit_msr.lo); ++ printk(BIOS_ERR, "*** Time out waiting for P-state %01x. Current P-state %01x P-state current limit MSRC001_0061=%08x %08x\n", target_pstate, pstate_msr.lo, limit_msr.hi, limit_msr.lo); + + do { // should we just go on instead ? + pstate_msr = rdmsr(CUR_PSTATE_MSR); +@@ -655,6 +675,7 @@ static void waitCurrentPstate(u32 target_pstate){ + + static void set_pstate(u32 nonBoostedPState) { + msr_t msr; ++ uint8_t skip_wait; + + // Transition P0 for calling core. + msr = rdmsr(0xC0010062); +@@ -662,12 +683,21 @@ static void set_pstate(u32 nonBoostedPState) { + msr.lo = nonBoostedPState; + wrmsr(0xC0010062, msr); + +- /* Wait for P0 to set. */ +- waitCurrentPstate(nonBoostedPState); +-} +- +- ++ if (is_fam15h()) { ++ /* Do not wait for the first (even) set of cores to transition on Family 15h systems */ ++ if ((cpuid_ebx(0x00000001) & 0x01000000)) ++ skip_wait = 0; ++ else ++ skip_wait = 1; ++ } else { ++ skip_wait = 0; ++ } + ++ if (!skip_wait) { ++ /* Wait for core to transition to P0 */ ++ waitCurrentPstate(nonBoostedPState); ++ } ++} + + static void UpdateSinglePlaneNbVid(void) + { +@@ -757,11 +787,14 @@ static u32 needs_NB_COF_VID_update(void) + u8 nodes; + u8 i; + ++ if (is_fam15h()) ++ return 0; ++ + /* If any node has nb_cof_vid_update set all nodes need an update. */ + nodes = get_nodes(); + nb_cof_vid_update = 0; + for (i = 0; i < nodes; i++) { +- u32 cpuRev = mctGetLogicalCPUID(i) ; ++ uint64_t cpuRev = mctGetLogicalCPUID(i); + u32 nbCofVidUpdateDefined = (cpuRev & (AMD_FAM10_LT_D)); + if (nbCofVidUpdateDefined + && (pci_read_config32(NODE_PCI(i, 3), 0x1FC) +@@ -785,9 +818,11 @@ static u32 init_fidvid_core(u32 nodeid, u32 coreid) + /* Steps 1-6 of BIOS NB COF and VID Configuration + * for SVI and Single-Plane PVI Systems. BKDG 2.4.2.9 #31116 rev 3.48 + */ +- + dev = NODE_PCI(nodeid, 3); +- pvimode = pci_read_config32(dev, PW_CTL_MISC) & PVI_MODE; ++ if (is_fam15h()) ++ pvimode = 0; ++ else ++ pvimode = pci_read_config32(dev, PW_CTL_MISC) & PVI_MODE; + reg1fc = pci_read_config32(dev, 0x1FC); + + if (nb_cof_vid_update) { +@@ -799,7 +834,7 @@ static u32 init_fidvid_core(u32 nodeid, u32 coreid) + fid_max = fid_max + ((reg1fc & DUAL_PLANE_NB_FID_OFF_MASK ) >> DUAL_PLANE_NB_FID_SHIFT ); + } + /* write newNbVid to P-state Reg's NbVid always if NbVidUpdatedAll=1 */ +- fixPsNbVidBeforeWR(vid_max, coreid,dev,pvimode); ++ fixPsNbVidBeforeWR(vid_max, coreid, dev, pvimode); + + /* fid setup is handled by the BSP at the end. */ + +@@ -819,7 +854,7 @@ static void init_fidvid_ap(u32 apicid, u32 nodeid, u32 coreid) + + printk(BIOS_DEBUG, "FIDVID on AP: %02x\n", apicid); + +- send = init_fidvid_core(nodeid,coreid); ++ send = init_fidvid_core(nodeid, coreid); + send |= (apicid << 24); // ap apicid + + // Send signal to BSP about this AP max fid +@@ -861,7 +896,7 @@ static void init_fidvid_bsp_stage1(u32 ap_apicid, void *gp) + while (--loop > 0) { + if (lapic_remote_read(ap_apicid, LAPIC_MSG_REG, &readback) != 0) + continue; +- if ((readback & 0x3f) == 1) { ++ if (((readback & 0x3f) == 1) || ((readback & 0x3f) == F10_APSTATE_ASLEEP)) { + timeout = 0; + break; /* target ap is in stage 1 */ + } +@@ -949,7 +984,10 @@ static void init_fidvid_stage2(u32 apicid, u32 nodeid) + /* If any node has nb_cof_vid_update set all nodes need an update. */ + + dev = NODE_PCI(nodeid, 3); +- pvimode = (pci_read_config32(dev, 0xA0) >> 8) & 1; ++ if (is_fam15h()) ++ pvimode = 0; ++ else ++ pvimode = (pci_read_config32(dev, 0xA0) >> 8) & 1; + reg1fc = pci_read_config32(dev, 0x1FC); + nbvid = (reg1fc >> 7) & 0x7F; + NbVidUpdateAll = (reg1fc >> 1) & 1; +@@ -970,15 +1008,17 @@ static void init_fidvid_stage2(u32 apicid, u32 nodeid) + pci_write_config32(dev, 0xA0, dtemp); + + dualPlaneOnly(dev); +- applyBoostFIDOffset(dev); ++ applyBoostFIDOffset(dev, nodeid); + enableNbPState1(dev); + + finalPstateChange(); + +- /* Set TSC to tick at the P0 ndfid rate */ +- msr = rdmsr(HWCR); +- msr.lo |= 1 << 24; +- wrmsr(HWCR, msr); ++ if (!is_fam15h()) { ++ /* Set TSC to tick at the P0 ndfid rate */ ++ msr = rdmsr(HWCR); ++ msr.lo |= 1 << 24; ++ wrmsr(HWCR, msr); ++ } + } + + +@@ -1012,8 +1052,7 @@ static int init_fidvid_bsp(u32 bsp_apicid, u32 nodes) + /* Steps 1-6 of BIOS NB COF and VID Configuration + * for SVI and Single-Plane PVI Systems. + */ +- +- fv.common_fid = init_fidvid_core(0,0); ++ fv.common_fid = init_fidvid_core(0, 0); + + print_debug_fv("BSP fid = ", fv.common_fid); + +diff --git a/src/cpu/amd/family_10h-family_15h/init_cpus.c b/src/cpu/amd/family_10h-family_15h/init_cpus.c +index 8de6d25..aced850 100644 +--- a/src/cpu/amd/family_10h-family_15h/init_cpus.c ++++ b/src/cpu/amd/family_10h-family_15h/init_cpus.c +@@ -30,9 +30,12 @@ + #include <northbridge/amd/amdfam10/raminit_amdmct.c> + #include <reset.h> + ++#if IS_ENABLED(CONFIG_SET_FIDVID) + static void prep_fid_change(void); + static void init_fidvid_stage2(u32 apicid, u32 nodeid); +-void cpuSetAMDMSR(void); ++#endif ++ ++void cpuSetAMDMSR(uint8_t node_id); + + #if CONFIG_PCI_IO_CFG_EXT + static void set_EnableCf8ExtCfg(void) +@@ -51,43 +54,38 @@ static void set_EnableCf8ExtCfg(void) { } + + typedef void (*process_ap_t) (u32 apicid, void *gp); + +-//core_range = 0 : all cores +-//core range = 1 : core 0 only +-//core range = 2 : cores other than core0 ++uint32_t get_boot_apic_id(uint8_t node, uint32_t core) { ++ uint32_t ap_apicid; + +-static void for_each_ap(u32 bsp_apicid, u32 core_range, process_ap_t process_ap, +- void *gp) +-{ +- // here assume the OS don't change our apicid +- u32 ap_apicid; ++ uint32_t nb_cfg_54; ++ uint32_t siblings; ++ uint32_t cores_found; + +- u32 nodes; +- u32 siblings; +- u32 disable_siblings; +- u32 cores_found; +- u32 nb_cfg_54; +- int i, j; +- u32 ApicIdCoreIdSize; ++ uint8_t fam15h = 0; + uint8_t rev_gte_d = 0; + uint8_t dual_node = 0; + uint32_t f3xe8; ++ uint32_t family; ++ uint32_t model; + +- /* get_nodes define in ht_wrapper.c */ +- nodes = get_nodes(); +- +- if (!CONFIG_LOGICAL_CPUS || +- read_option(multi_core, 0) != 0) { // 0 means multi core +- disable_siblings = 1; +- } else { +- disable_siblings = 0; +- } ++ uint32_t ApicIdCoreIdSize; + + /* Assume that all node are same stepping, otherwise we can use use + nb_cfg_54 from bsp for all nodes */ + nb_cfg_54 = read_nb_cfg_54(); + f3xe8 = pci_read_config32(NODE_PCI(0, 3), 0xe8); + +- if (cpuid_eax(0x80000001) >= 0x8) ++ family = model = cpuid_eax(0x80000001); ++ model = ((model & 0xf0000) >> 12) | ((model & 0xf0) >> 4); ++ family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8); ++ ++ if (family >= 0x6f) { ++ /* Family 15h or later */ ++ fam15h = 1; ++ nb_cfg_54 = 1; ++ } ++ ++ if ((model >= 0x8) || fam15h) + /* Revision D or later */ + rev_gte_d = 1; + +@@ -103,10 +101,63 @@ static void for_each_ap(u32 bsp_apicid, u32 core_range, process_ap_t process_ap, + siblings = 3; //quad core + } + ++ cores_found = get_core_num_in_bsp(node); ++ if (siblings > cores_found) ++ siblings = cores_found; ++ ++ if (dual_node) { ++ ap_apicid = 0; ++ if (fam15h) { ++ ap_apicid |= ((node >> 1) & 0x3) << 5; /* Node ID */ ++ ap_apicid |= ((node & 0x1) * (siblings + 1)) + core; /* Core ID */ ++ } else { ++ if (nb_cfg_54) { ++ ap_apicid |= ((node >> 1) & 0x3) << 4; /* Node ID */ ++ ap_apicid |= ((node & 0x1) * (siblings + 1)) + core; /* Core ID */ ++ } else { ++ ap_apicid |= node & 0x3; /* Node ID */ ++ ap_apicid |= (((node & 0x1) * (siblings + 1)) + core) << 4; /* Core ID */ ++ } ++ } ++ } else { ++ if (fam15h) { ++ ap_apicid = (node * (siblings + 1)) + core; ++ } else { ++ ap_apicid = node * (nb_cfg_54 ? (siblings + 1) : 1) + ++ core * (nb_cfg_54 ? 1 : 64); ++ } ++ } ++ ++ return ap_apicid; ++} ++ ++//core_range = 0 : all cores ++//core range = 1 : core 0 only ++//core range = 2 : cores other than core0 ++ ++static void for_each_ap(u32 bsp_apicid, u32 core_range, process_ap_t process_ap, ++ void *gp) ++{ ++ // here assume the OS don't change our apicid ++ u32 ap_apicid; ++ ++ u32 nodes; ++ u32 disable_siblings; ++ u32 cores_found; ++ int i, j; ++ ++ /* get_nodes define in ht_wrapper.c */ ++ nodes = get_nodes(); ++ ++ if (!CONFIG_LOGICAL_CPUS || ++ read_option(multi_core, 0) != 0) { // 0 means multi core ++ disable_siblings = 1; ++ } else { ++ disable_siblings = 0; ++ } ++ + for (i = 0; i < nodes; i++) { + cores_found = get_core_num_in_bsp(i); +- if (siblings > cores_found) +- siblings = cores_found; + + u32 jstart, jend; + +@@ -123,21 +174,7 @@ static void for_each_ap(u32 bsp_apicid, u32 core_range, process_ap_t process_ap, + } + + for (j = jstart; j <= jend; j++) { +- if (dual_node) { +- ap_apicid = 0; +- if (nb_cfg_54) { +- ap_apicid |= ((i >> 1) & 0x3) << 4; /* Node ID */ +- ap_apicid |= ((i & 0x1) * (siblings + 1)) + j; /* Core ID */ +- } else { +- ap_apicid |= i & 0x3; /* Node ID */ +- ap_apicid |= (((i & 0x1) * (siblings + 1)) + j) << 4; /* Core ID */ +- } +- } else { +- ap_apicid = +- i * (nb_cfg_54 ? (siblings + 1) : 1) + +- j * (nb_cfg_54 ? 1 : 64); +- } +- ++ ap_apicid = get_boot_apic_id(i, j); + + #if CONFIG_ENABLE_APIC_EXT_ID && (CONFIG_APIC_ID_OFFSET > 0) + #if !CONFIG_LIFT_BSP_APIC_ID +@@ -197,7 +234,7 @@ void print_apicid_nodeid_coreid(u32 apicid, struct node_core_id id, + apicid, id.nodeid, id.coreid); + } + +-static u32 wait_cpu_state(u32 apicid, u32 state) ++uint32_t wait_cpu_state(uint32_t apicid, uint32_t state, uint32_t state2) + { + u32 readback = 0; + u32 timeout = 1; +@@ -205,7 +242,7 @@ static u32 wait_cpu_state(u32 apicid, u32 state) + while (--loop > 0) { + if (lapic_remote_read(apicid, LAPIC_MSG_REG, &readback) != 0) + continue; +- if ((readback & 0x3f) == state || (readback & 0x3f) == F10_APSTATE_RESET) { ++ if ((readback & 0x3f) == state || (readback & 0x3f) == state2 || (readback & 0x3f) == F10_APSTATE_RESET) { + timeout = 0; + break; //target cpu is in stage started + } +@@ -222,7 +259,7 @@ static u32 wait_cpu_state(u32 apicid, u32 state) + static void wait_ap_started(u32 ap_apicid, void *gp) + { + u32 timeout; +- timeout = wait_cpu_state(ap_apicid, F10_APSTATE_STARTED); ++ timeout = wait_cpu_state(ap_apicid, F10_APSTATE_STARTED, F10_APSTATE_ASLEEP); + printk(BIOS_DEBUG, "* AP %02x", ap_apicid); + if (timeout) { + printk(BIOS_DEBUG, " timed out:%08x\n", timeout); +@@ -258,16 +295,27 @@ static void enable_apic_ext_id(u32 node) + pci_write_config32(NODE_HT(node), 0x68, val); + } + +-static void STOP_CAR_AND_CPU(void) ++static void STOP_CAR_AND_CPU(uint8_t skip_sharedc_config, uint32_t apicid) + { + msr_t msr; ++ uint32_t family; ++ ++ family = amd_fam1x_cpu_family(); // inline ++ ++ if (family < 0x6f) { ++ /* Family 10h or earlier */ ++ ++ /* Disable L2 IC to L3 connection (Only for CAR) */ ++ msr = rdmsr(BU_CFG2); ++ msr.lo &= ~(1 << ClLinesToNbDis); ++ wrmsr(BU_CFG2, msr); ++ } + +- /* Disable L2 IC to L3 connection (Only for CAR) */ +- msr = rdmsr(BU_CFG2); +- msr.lo &= ~(1 << ClLinesToNbDis); +- wrmsr(BU_CFG2, msr); ++ disable_cache_as_ram(skip_sharedc_config); // inline ++ ++ /* Mark the core as sleeping */ ++ lapic_write(LAPIC_MSG_REG, (apicid << 24) | F10_APSTATE_ASLEEP); + +- disable_cache_as_ram(); // inline + /* stop all cores except node0/core0 the bsp .... */ + stop_this_cpu(); + } +@@ -276,6 +324,7 @@ static u32 init_cpus(u32 cpu_init_detectedx, struct sys_info *sysinfo) + { + u32 bsp_apicid = 0; + u32 apicid; ++ uint8_t set_mtrrs; + struct node_core_id id; + + /* Please refer to the calculations and explaination in cache_as_ram.inc before modifying these values */ +@@ -362,7 +411,7 @@ static u32 init_cpus(u32 cpu_init_detectedx, struct sys_info *sysinfo) + */ + update_microcode(cpuid_eax(1)); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(id.nodeid); + + #if CONFIG_SET_FIDVID + #if CONFIG_LOGICAL_CPUS && CONFIG_SET_FIDVID_CORE0_ONLY +@@ -385,10 +434,29 @@ static u32 init_cpus(u32 cpu_init_detectedx, struct sys_info *sysinfo) + } + #endif + ++ if (is_fam15h()) { ++ /* core 1 on node 0 is special; to avoid corrupting the ++ * BSP do not alter MTRRs on that core */ ++ if (apicid == 1) ++ set_mtrrs = 0; ++ else ++ set_mtrrs = !!(apicid & 0x1); ++ } else { ++ set_mtrrs = 1; ++ } ++ + /* AP is ready, configure MTRRs and go to sleep */ +- set_var_mtrr(0, 0x00000000, CONFIG_RAMTOP, MTRR_TYPE_WRBACK); ++ if (set_mtrrs) ++ set_var_mtrr(0, 0x00000000, CONFIG_RAMTOP, MTRR_TYPE_WRBACK); + +- STOP_CAR_AND_CPU(); ++ printk(BIOS_DEBUG, "Disabling CAR on AP %02x\n", apicid); ++ if (is_fam15h()) { ++ /* Only modify the MSRs on the odd cores (the last cores to finish booting) */ ++ STOP_CAR_AND_CPU(!set_mtrrs, apicid); ++ } else { ++ /* Modify MSRs on all cores */ ++ STOP_CAR_AND_CPU(0, apicid); ++ } + + printk(BIOS_DEBUG, + "\nAP %02x should be halted but you are reading this....\n", +@@ -496,7 +564,7 @@ static void setup_remote_node(u8 node) + } + #endif /* CONFIG_MAX_PHYSICAL_CPUS > 1 */ + +-static void AMD_Errata281(u8 node, u32 revision, u32 platform) ++static void AMD_Errata281(u8 node, uint64_t revision, u32 platform) + { + /* Workaround for Transaction Scheduling Conflict in + * Northbridge Cross Bar. Implement XCS Token adjustment +@@ -794,7 +862,7 @@ static void AMD_SetHtPhyRegister(u8 node, u8 link, u8 entry) + } while (!(val & HTPHY_IS_COMPLETE_MASK)); + } + +-void cpuSetAMDMSR(void) ++void cpuSetAMDMSR(uint8_t node_id) + { + /* This routine loads the CPU with default settings in fam10_msr_default + * table . It must be run after Cache-As-RAM has been enabled, and +@@ -804,7 +872,8 @@ void cpuSetAMDMSR(void) + */ + msr_t msr; + u8 i; +- u32 revision, platform; ++ u32 platform; ++ uint64_t revision; + + printk(BIOS_DEBUG, "cpuSetAMDMSR "); + +@@ -824,6 +893,49 @@ void cpuSetAMDMSR(void) + } + AMD_Errata298(); + ++ if (revision & AMD_FAM15_ALL) { ++ uint32_t f5x80; ++ uint8_t enabled; ++ uint8_t compute_unit_count = 0; ++ f5x80 = pci_read_config32(NODE_PCI(node_id, 5), 0x80); ++ enabled = f5x80 & 0xf; ++ if (enabled == 0x1) ++ compute_unit_count = 1; ++ if (enabled == 0x3) ++ compute_unit_count = 2; ++ if (enabled == 0x7) ++ compute_unit_count = 3; ++ if (enabled == 0xf) ++ compute_unit_count = 4; ++ msr = rdmsr(BU_CFG2); ++ msr.lo &= ~(0x3 << 6); /* ThrottleNbInterface[1:0] */ ++ msr.lo |= (((compute_unit_count - 1) & 0x3) << 6); ++ wrmsr(BU_CFG2, msr); ++ } ++ ++ /* Revision C0 and above */ ++ if (revision & AMD_OR_C0) { ++ uint32_t f3x1fc = pci_read_config32(NODE_PCI(node_id, 3), 0x1fc); ++ msr = rdmsr(FP_CFG); ++ msr.hi &= ~(0x7 << (42-32)); /* DiDtCfg4 */ ++ msr.hi |= (((f3x1fc >> 17) & 0x7) << (42-32)); ++ msr.hi &= ~(0x1 << (41-32)); /* DiDtCfg5 */ ++ msr.hi |= (((f3x1fc >> 22) & 0x1) << (41-32)); ++ msr.hi &= ~(0x1 << (40-32)); /* DiDtCfg3 */ ++ msr.hi |= (((f3x1fc >> 16) & 0x1) << (40-32)); ++ msr.hi &= ~(0x7 << (32-32)); /* DiDtCfg1 (1) */ ++ msr.hi |= (((f3x1fc >> 11) & 0x7) << (32-32)); ++ msr.lo &= ~(0x1f << 27); /* DiDtCfg1 (2) */ ++ msr.lo |= (((f3x1fc >> 6) & 0x1f) << 27); ++ msr.lo &= ~(0x3 << 25); /* DiDtCfg2 */ ++ msr.lo |= (((f3x1fc >> 14) & 0x3) << 25); ++ msr.lo &= ~(0x1f << 18); /* DiDtCfg0 */ ++ msr.lo |= (((f3x1fc >> 1) & 0x1f) << 18); ++ msr.lo &= ~(0x1 << 16); /* DiDtMode */ ++ msr.lo |= ((f3x1fc & 0x1) << 16); ++ wrmsr(FP_CFG, msr); ++ } ++ + printk(BIOS_DEBUG, " done\n"); + } + +@@ -835,9 +947,10 @@ static void cpuSetAMDPCI(u8 node) + * that it is run for the first core on each node + */ + u8 i, j; +- u32 revision, platform; ++ u32 platform; + u32 val; + u8 offset; ++ uint64_t revision; + + printk(BIOS_DEBUG, "cpuSetAMDPCI %02d", node); + +@@ -899,6 +1012,7 @@ static void cpuSetAMDPCI(u8 node) + } + + #ifdef UNUSED_CODE ++/* Clearing the MCA registers is apparently handled in the ramstage CPU Function 3 driver */ + static void cpuInitializeMCA(void) + { + /* Clears Machine Check Architecture (MCA) registers, which power on +diff --git a/src/cpu/amd/family_10h-family_15h/model_10xxx_init.c b/src/cpu/amd/family_10h-family_15h/model_10xxx_init.c +index b942c1a..8a61f13 100644 +--- a/src/cpu/amd/family_10h-family_15h/model_10xxx_init.c ++++ b/src/cpu/amd/family_10h-family_15h/model_10xxx_init.c +@@ -39,6 +39,23 @@ + + #define MCI_STATUS 0x401 + ++static inline uint8_t is_fam15h(void) ++{ ++ uint8_t fam15h = 0; ++ uint32_t family; ++ ++ family = cpuid_eax(0x80000001); ++ family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8); ++ ++ if (family >= 0x6f) ++ /* Family 15h or later */ ++ fam15h = 1; ++ ++ return fam15h; ++} ++ ++static volatile uint8_t fam15h_startup_flags[MAX_NODES_SUPPORTED][MAX_CORES_SUPPORTED] = {{ 0 }}; ++ + static void model_10xxx_init(device_t dev) + { + u8 i; +@@ -47,13 +64,44 @@ static void model_10xxx_init(device_t dev) + #if CONFIG_LOGICAL_CPUS + u32 siblings; + #endif ++ uint8_t delay_start; + + id = get_node_core_id(read_nb_cfg_54()); /* nb_cfg_54 can not be set */ + printk(BIOS_DEBUG, "nodeid = %02d, coreid = %02d\n", id.nodeid, id.coreid); + ++ if (is_fam15h()) ++ delay_start = !!(id.coreid & 0x1); ++ else ++ delay_start = 0; ++ + /* Turn on caching if we haven't already */ + x86_enable_cache(); +- amd_setup_mtrrs(); ++ ++ if (!delay_start) { ++ /* Initialize all variable MTRRs except the first pair. ++ * This prevents Linux from having to correct an inconsistent ++ * MTRR setup, which would crash Family 15h CPUs due to the ++ * compute unit structure sharing MTRR MSRs between AP cores. ++ */ ++ msr.hi = 0x00000000; ++ msr.lo = 0x00000000; ++ ++ disable_cache(); ++ ++ for (i = 0x2; i < 0x10; i++) { ++ wrmsr(0x00000200 | i, msr); ++ } ++ ++ enable_cache(); ++ ++ /* Set up other MTRRs */ ++ amd_setup_mtrrs(); ++ } else { ++ while (!fam15h_startup_flags[id.nodeid][id.coreid - 1]) { ++ /* Wait for CU first core startup */ ++ } ++ } ++ + x86_mtrr_check(); + + disable_cache(); +@@ -88,17 +136,24 @@ static void model_10xxx_init(device_t dev) + printk(BIOS_DEBUG, "siblings = %02d, ", siblings); + #endif + +- /* DisableCf8ExtCfg */ ++ /* Disable Cf8ExtCfg */ + msr = rdmsr(NB_CFG_MSR); + msr.hi &= ~(1 << (46 - 32)); + wrmsr(NB_CFG_MSR, msr); + +- msr = rdmsr(BU_CFG2_MSR); +- /* Clear ClLinesToNbDis */ +- msr.lo &= ~(1 << 15); +- /* Clear bit 35 as per Erratum 343 */ +- msr.hi &= ~(1 << (35-32)); +- wrmsr(BU_CFG2_MSR, msr); ++ if (is_fam15h()) { ++ msr = rdmsr(BU_CFG3_MSR); ++ /* Set CombineCr0Cd */ ++ msr.hi |= (1 << (49-32)); ++ wrmsr(BU_CFG3_MSR, msr); ++ } else { ++ msr = rdmsr(BU_CFG2_MSR); ++ /* Clear ClLinesToNbDis */ ++ msr.lo &= ~(1 << 15); ++ /* Clear bit 35 as per Erratum 343 */ ++ msr.hi &= ~(1 << (35-32)); ++ wrmsr(BU_CFG2_MSR, msr); ++ } + + if (IS_ENABLED(CONFIG_HAVE_SMI_HANDLER)) { + printk(BIOS_DEBUG, "Initializing SMM ASeg memory\n"); +@@ -131,6 +186,7 @@ static void model_10xxx_init(device_t dev) + msr.lo |= (1 << 0); + wrmsr(HWCR_MSR, msr); + ++ fam15h_startup_flags[id.nodeid][id.coreid] = 1; + } + + static struct device_operations cpu_dev_ops = { +@@ -147,15 +203,17 @@ static struct cpu_device_id cpu_table[] = { + { X86_VENDOR_AMD, 0x100f22 }, + { X86_VENDOR_AMD, 0x100f23 }, + { X86_VENDOR_AMD, 0x100f40 }, /* RB-C0 */ +- { X86_VENDOR_AMD, 0x100F42 }, /* RB-C2 */ +- { X86_VENDOR_AMD, 0x100F43 }, /* RB-C3 */ +- { X86_VENDOR_AMD, 0x100F52 }, /* BL-C2 */ +- { X86_VENDOR_AMD, 0x100F62 }, /* DA-C2 */ +- { X86_VENDOR_AMD, 0x100F63 }, /* DA-C3 */ +- { X86_VENDOR_AMD, 0x100F80 }, /* HY-D0 */ +- { X86_VENDOR_AMD, 0x100F81 }, /* HY-D1 */ +- { X86_VENDOR_AMD, 0x100F91 }, /* HY-D1 */ +- { X86_VENDOR_AMD, 0x100FA0 }, /* PH-E0 */ ++ { X86_VENDOR_AMD, 0x100f42 }, /* RB-C2 */ ++ { X86_VENDOR_AMD, 0x100f43 }, /* RB-C3 */ ++ { X86_VENDOR_AMD, 0x100f52 }, /* BL-C2 */ ++ { X86_VENDOR_AMD, 0x100f62 }, /* DA-C2 */ ++ { X86_VENDOR_AMD, 0x100f63 }, /* DA-C3 */ ++ { X86_VENDOR_AMD, 0x100f80 }, /* HY-D0 */ ++ { X86_VENDOR_AMD, 0x100f81 }, /* HY-D1 */ ++ { X86_VENDOR_AMD, 0x100f91 }, /* HY-D1 */ ++ { X86_VENDOR_AMD, 0x100fa0 }, /* PH-E0 */ ++ { X86_VENDOR_AMD, 0x600f12 }, /* OR-B2 */ ++ { X86_VENDOR_AMD, 0x600f20 }, /* OR-C0 */ + { 0, 0 }, + }; + +diff --git a/src/cpu/amd/family_10h-family_15h/powernow_acpi.c b/src/cpu/amd/family_10h-family_15h/powernow_acpi.c +index 98ef08a..84e5514 100644 +--- a/src/cpu/amd/family_10h-family_15h/powernow_acpi.c ++++ b/src/cpu/amd/family_10h-family_15h/powernow_acpi.c +@@ -74,8 +74,7 @@ static void write_pstates_for_core(u8 pstate_num, u16 *pstate_feq, u32 *pstate_p + /* Revision C or greater single-link processor */ + cpuid1 = cpuid(0x80000008); + acpigen_write_PSD_package(0, (cpuid1.ecx & 0xff) + 1, SW_ALL); +- } +- else { ++ } else { + /* Find the local APIC ID for the specified core ID */ + struct device* cpu; + int cpu_index = 0; +@@ -99,7 +98,9 @@ static void write_pstates_for_core(u8 pstate_num, u16 *pstate_feq, u32 *pstate_p + } + + /* +-* For details of this algorithm, please refer to the BDKG 3.62 page 69 ++* For details of this algorithm, please refer to: ++* Family 10h BDKG 3.62 page 69 ++* Family 15h BDKG 3.14 page 74 + * + * WARNING: The core count algorithm below assumes that all processors + * are identical, with the same number of active cores. While the BKDG +@@ -149,6 +150,13 @@ void amd_generate_powernow(u32 pcontrol_blk, u8 plen, u8 onlyBSP) + uint8_t node_count; + uint8_t cores_per_node; + uint8_t total_core_count; ++ uint8_t fam15h; ++ uint8_t fam10h_rev_e = 0; ++ ++ /* Detect Revision E processors via method used in fidvid.c */ ++ if ((cpuid_edx(0x80000007) & CPB_MASK) ++ && ((cpuid_ecx(0x80000008) & NC_MASK) == 5)) ++ fam10h_rev_e = 1; + + /* + * Based on the CPU socket type,cmp_cap and pwr_lmt , get the power limit. +@@ -156,11 +164,17 @@ void amd_generate_powernow(u32 pcontrol_blk, u8 plen, u8 onlyBSP) + * cmp_cap : 0x0 SingleCore ; 0x1 DualCore ; 0x2 TripleCore ; 0x3 QuadCore ; 0x4 QuintupleCore ; 0x5 HexCore + */ + printk(BIOS_INFO, "Pstates algorithm ...\n"); ++ fam15h = !!(mctGetLogicalCPUID(0) & AMD_FAM15_ALL); + /* Get number of cores */ +- dtemp = pci_read_config32(dev_find_slot(0, PCI_DEVFN(0x18, 3)), 0xE8); +- cmp_cap = (dtemp & 0x3000) >> 12; +- if (mctGetLogicalCPUID(0) & AMD_FAM10_REV_D) /* revision D */ +- cmp_cap |= (dtemp & 0x8000) >> 13; ++ if (fam15h) { ++ cmp_cap = pci_read_config32(dev_find_slot(0, PCI_DEVFN(0x18, 5)), 0x84) & 0xff; ++ } else { ++ dtemp = pci_read_config32(dev_find_slot(0, PCI_DEVFN(0x18, 3)), 0xe8); ++ cmp_cap = (dtemp & 0x3000) >> 12; ++ if (mctGetLogicalCPUID(0) & (AMD_FAM10_REV_D | AMD_FAM15_ALL)) /* revision D or higher */ ++ cmp_cap |= (dtemp & 0x8000) >> 13; ++ } ++ + /* Get number of nodes */ + dtemp = pci_read_config32(dev_find_slot(0, PCI_DEVFN(0x18, 0)), 0x60); + node_count = ((dtemp & 0x70) >> 4) + 1; +@@ -169,6 +183,14 @@ void amd_generate_powernow(u32 pcontrol_blk, u8 plen, u8 onlyBSP) + /* Compute total number of cores installed in system */ + total_core_count = cores_per_node * node_count; + ++ /* Get number of boost states */ ++ uint8_t boost_count = 0; ++ dtemp = pci_read_config32(dev_find_slot(0, PCI_DEVFN(0x18, 4)), 0x15c); ++ if (fam10h_rev_e) ++ boost_count = (dtemp >> 2) & 0x1; ++ else if (mctGetLogicalCPUID(0) & AMD_FAM15_ALL) ++ boost_count = (dtemp >> 2) & 0x7; ++ + Pstate_num = 0; + + /* See if the CPUID(0x80000007) returned EDX[7]==1b */ +@@ -205,7 +227,7 @@ void amd_generate_powernow(u32 pcontrol_blk, u8 plen, u8 onlyBSP) + + /* Get PSmax's index */ + msr = rdmsr(0xC0010061); +- Pstate_max = (uint8_t) ((msr.lo >> PS_MAX_VAL_SHFT) & BIT_MASK_3); ++ Pstate_max = (uint8_t) ((msr.lo >> PS_MAX_VAL_SHFT) & ((fam15h)?BIT_MASK_7:BIT_MASK_3)); + + /* Determine if all enabled Pstates have the same fidvid */ + uint8_t i; +@@ -219,10 +241,14 @@ void amd_generate_powernow(u32 pcontrol_blk, u8 plen, u8 onlyBSP) + } + } + ++ /* Family 15h uses slightly different PSmax numbering */ ++ if (fam15h) ++ Pstate_max++; ++ + /* Populate tables with all Pstate information */ + for (Pstate_num = 0; Pstate_num < Pstate_max; Pstate_num++) { + /* Get power state information */ +- msr = rdmsr(0xC0010064 + Pstate_num); ++ msr = rdmsr(0xC0010064 + Pstate_num + boost_count); + cpufid = (msr.lo & 0x3f); + cpudid = (msr.lo & 0x1c0) >> 6; + cpuvid = (msr.lo & 0xfe00) >> 9; +@@ -232,12 +258,10 @@ void amd_generate_powernow(u32 pcontrol_blk, u8 plen, u8 onlyBSP) + if (pviModeFlag) { + if (cpuvid >= 0x20) { + core_voltage = 7625 - (((cpuvid - 0x20) * 10000) / 80); +- } +- else { ++ } else { + core_voltage = 15500 - ((cpuvid * 10000) / 40); + } +- } +- else { ++ } else { + cpuvid = cpuvid & 0x7f; + if (cpuvid >= 0x7c) + core_voltage = 0; +diff --git a/src/cpu/amd/family_10h-family_15h/processor_name.c b/src/cpu/amd/family_10h-family_15h/processor_name.c +index 12c45c9..fbd0452 100644 +--- a/src/cpu/amd/family_10h-family_15h/processor_name.c ++++ b/src/cpu/amd/family_10h-family_15h/processor_name.c +@@ -33,6 +33,10 @@ + #include <cpu/amd/mtrr.h> + #include <cpu/cpu.h> + #include <cpu/amd/model_10xxx_rev.h> ++#include <device/device.h> ++#include <device/pci.h> ++#include <device/pnp.h> ++#include <device/pci_ops.h> + + /* The maximum length of CPU names is 48 bytes, including the final NULL byte. + * If you change these names your BIOS will _NOT_ pass the AMD validation and +@@ -212,104 +216,138 @@ static int strcpymax(char *dst, const char *src, int buflen) + return i; + } + ++#define NAME_STRING_MAXLEN 48 + + int init_processor_name(void) + { +- /* variable names taken from fam10 revision guide for clarity */ +- u32 BrandId; /* CPUID Fn8000_0001_EBX */ +- u8 String1; /* BrandID[14:11] */ +- u8 String2; /* BrandID[3:0] */ +- u8 Model; /* BrandID[10:4] */ +- u8 Pg; /* BrandID[15] */ +- u8 PkgTyp; /* BrandID[31:28] */ +- u8 NC; /* CPUID Fn8000_0008_ECX */ +- const char *processor_name_string = unknown; +- char program_string[48]; +- u32 *p_program_string = (u32 *)program_string; + msr_t msr; +- int i, j = 0, str2_checkNC = 1; +- const struct str_s *str, *str2; ++ ssize_t i; ++ char program_string[NAME_STRING_MAXLEN]; ++ u32 *p_program_string = (u32 *)program_string; ++ uint8_t fam15h = 0; ++ uint32_t family; + ++ family = cpuid_eax(0x80000001); ++ family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8); + +- /* Find out which CPU brand it is */ +- BrandId = cpuid_ebx(0x80000001); +- String1 = (u8)((BrandId >> 11) & 0x0F); +- String2 = (u8)((BrandId >> 0) & 0x0F); +- Model = (u8)((BrandId >> 4) & 0x7F); +- Pg = (u8)((BrandId >> 15) & 0x01); +- PkgTyp = (u8)((BrandId >> 28) & 0x0F); +- NC = (u8)(cpuid_ecx(0x80000008) & 0xFF); ++ if (family >= 0x6f) ++ /* Family 15h or later */ ++ fam15h = 1; + + /* null the string */ + memset(program_string, 0, sizeof(program_string)); + +- if (!Model) { +- processor_name_string = Pg ? thermal : sample; +- goto done; +- } +- +- switch (PkgTyp) { +- case 0: /* F1207 */ +- str = String1_socket_F; +- str2 = String2_socket_F; +- str2_checkNC = 0; +- break; +- case 1: /* AM2 */ +- str = String1_socket_AM2; +- str2 = String2_socket_AM2; +- break; +- case 3: /* G34 */ +- str = String1_socket_G34; +- str2 = String2_socket_G34; +- str2_checkNC = 0; +- break; +- case 5: /* C32 */ +- str = String1_socket_C32; +- str2 = String2_socket_C32; +- break; +- default: +- goto done; +- } ++ if (fam15h) { ++ /* Family 15h or later */ ++ uint32_t dword; ++ device_t cpu_fn5_dev = dev_find_slot(0, PCI_DEVFN(0x18, 5)); ++ pci_write_config32(cpu_fn5_dev, 0x194, 0); ++ dword = pci_read_config32(cpu_fn5_dev, 0x198); ++ if (dword == 0) { ++ strcpymax(program_string, sample, sizeof(program_string)); ++ } else { ++ /* Assemble the string from PCI configuration register contents */ ++ for (i = 0; i < 12; i++) { ++ pci_write_config32(cpu_fn5_dev, 0x194, i); ++ p_program_string[i] = pci_read_config32(cpu_fn5_dev, 0x198); ++ } ++ ++ /* Correctly place the null terminator */ ++ for (i = (NAME_STRING_MAXLEN - 2); i > 0; i--) { ++ if (program_string[i] != 0x20) ++ break; ++ } ++ program_string[i + 1] = 0; ++ } ++ } else { ++ /* variable names taken from fam10 revision guide for clarity */ ++ u32 BrandId; /* CPUID Fn8000_0001_EBX */ ++ u8 String1; /* BrandID[14:11] */ ++ u8 String2; /* BrandID[3:0] */ ++ u8 Model; /* BrandID[10:4] */ ++ u8 Pg; /* BrandID[15] */ ++ u8 PkgTyp; /* BrandID[31:28] */ ++ u8 NC; /* CPUID Fn8000_0008_ECX */ ++ const char *processor_name_string = unknown; ++ int j = 0, str2_checkNC = 1; ++ const struct str_s *str, *str2; ++ ++ /* Find out which CPU brand it is */ ++ BrandId = cpuid_ebx(0x80000001); ++ String1 = (u8)((BrandId >> 11) & 0x0F); ++ String2 = (u8)((BrandId >> 0) & 0x0F); ++ Model = (u8)((BrandId >> 4) & 0x7F); ++ Pg = (u8)((BrandId >> 15) & 0x01); ++ PkgTyp = (u8)((BrandId >> 28) & 0x0F); ++ NC = (u8)(cpuid_ecx(0x80000008) & 0xFF); ++ ++ if (!Model) { ++ processor_name_string = Pg ? thermal : sample; ++ goto done; ++ } + +- /* String1 */ +- for (i = 0; str[i].value; i++) { +- if ((str[i].Pg == Pg) && +- (str[i].NC == NC) && +- (str[i].String == String1)) { +- processor_name_string = str[i].value; ++ switch (PkgTyp) { ++ case 0: /* F1207 */ ++ str = String1_socket_F; ++ str2 = String2_socket_F; ++ str2_checkNC = 0; ++ break; ++ case 1: /* AM2 */ ++ str = String1_socket_AM2; ++ str2 = String2_socket_AM2; ++ break; ++ case 3: /* G34 */ ++ str = String1_socket_G34; ++ str2 = String2_socket_G34; ++ str2_checkNC = 0; ++ break; ++ case 5: /* C32 */ ++ str = String1_socket_C32; ++ str2 = String2_socket_C32; + break; ++ default: ++ goto done; + } +- } + +- if (!str[i].value) +- goto done; ++ /* String1 */ ++ for (i = 0; str[i].value; i++) { ++ if ((str[i].Pg == Pg) && ++ (str[i].NC == NC) && ++ (str[i].String == String1)) { ++ processor_name_string = str[i].value; ++ break; ++ } ++ } + +- j = strcpymax(program_string, processor_name_string, +- sizeof(program_string)); ++ if (!str[i].value) ++ goto done; + +- /* Translate Model from 01-99 to ASCII and put it on the end. +- * Numbers less than 10 should include a leading zero, e.g., 09.*/ +- if (Model < 100 && j < sizeof(program_string) - 2) { +- program_string[j++] = (Model / 10) + '0'; +- program_string[j++] = (Model % 10) + '0'; +- } ++ j = strcpymax(program_string, processor_name_string, ++ sizeof(program_string)); + +- processor_name_string = unknown2; +- +- /* String 2 */ +- for(i = 0; str2[i].value; i++) { +- if ((str2[i].Pg == Pg) && +- ((str2[i].NC == NC) || !str2_checkNC) && +- (str2[i].String == String2)) { +- processor_name_string = str2[i].value; +- break; ++ /* Translate Model from 01-99 to ASCII and put it on the end. ++ * Numbers less than 10 should include a leading zero, e.g., 09.*/ ++ if (Model < 100 && j < sizeof(program_string) - 2) { ++ program_string[j++] = (Model / 10) + '0'; ++ program_string[j++] = (Model % 10) + '0'; + } +- } + ++ processor_name_string = unknown2; ++ ++ /* String 2 */ ++ for(i = 0; str2[i].value; i++) { ++ if ((str2[i].Pg == Pg) && ++ ((str2[i].NC == NC) || !str2_checkNC) && ++ (str2[i].String == String2)) { ++ processor_name_string = str2[i].value; ++ break; ++ } ++ } + +-done: +- strcpymax(&program_string[j], processor_name_string, +- sizeof(program_string) - j); ++ done: ++ strcpymax(&program_string[j], processor_name_string, ++ sizeof(program_string) - j); ++ } + + printk(BIOS_DEBUG, "CPU model: %s\n", program_string); + +diff --git a/src/cpu/amd/family_10h-family_15h/update_microcode.c b/src/cpu/amd/family_10h-family_15h/update_microcode.c +index 51aca35..3b2f5dd 100644 +--- a/src/cpu/amd/family_10h-family_15h/update_microcode.c ++++ b/src/cpu/amd/family_10h-family_15h/update_microcode.c +@@ -28,6 +28,7 @@ struct id_mapping { + + static u16 get_equivalent_processor_rev_id(u32 orig_id) { + static const struct id_mapping id_mapping_table[] = { ++ /* Family 10h */ + { 0x100f00, 0x1000 }, + { 0x100f01, 0x1000 }, + { 0x100f02, 0x1000 }, +@@ -42,8 +43,13 @@ static u16 get_equivalent_processor_rev_id(u32 orig_id) { + { 0x100f62, 0x1062 }, /* DA-C2 */ + { 0x100f63, 0x1043 }, /* DA-C3 */ + { 0x100f81, 0x1081 }, /* HY-D1 */ ++ { 0x100f91, 0x1081 }, /* HY-D1 */ + { 0x100fa0, 0x10A0 }, /* PH-E0 */ + ++ /* Family 15h */ ++ { 0x600f12, 0x6012 }, /* OR-B2 */ ++ { 0x600f20, 0x6020 }, /* OR-C0 */ ++ + /* Array terminator */ + { 0xffffff, 0x0000 }, + }; +diff --git a/src/cpu/amd/quadcore/quadcore.c b/src/cpu/amd/quadcore/quadcore.c +index 9c21e94..8a9b5ed 100644 +--- a/src/cpu/amd/quadcore/quadcore.c ++++ b/src/cpu/amd/quadcore/quadcore.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -26,16 +27,41 @@ + + #include "cpu/amd/quadcore/quadcore_id.c" + ++/* get_boot_apic_id and wait_cpu_state located in init_cpus.c */ ++uint32_t get_boot_apic_id(uint8_t node, uint32_t core); ++uint32_t wait_cpu_state(uint32_t apicid, uint32_t state, uint32_t state2); ++ ++static inline uint8_t is_fam15h(void) ++{ ++ uint8_t fam15h = 0; ++ uint32_t family; ++ ++ family = cpuid_eax(0x80000001); ++ family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8); ++ ++ if (family >= 0x6f) ++ /* Family 15h or later */ ++ fam15h = 1; ++ ++ return fam15h; ++} ++ + static u32 get_core_num_in_bsp(u32 nodeid) + { + u32 dword; +- dword = pci_read_config32(NODE_PCI(nodeid, 3), 0xe8); +- dword >>= 12; +- /* Bit 15 is CmpCap[2] since Revision D. */ +- if ((cpuid_ecx(0x80000008) & 0xff) > 3) +- dword = ((dword & 8) >> 1) | (dword & 3); +- else +- dword &= 3; ++ if (is_fam15h()) { ++ /* Family 15h moved CmpCap to F5x84 [7:0] */ ++ dword = pci_read_config32(NODE_PCI(nodeid, 5), 0x84); ++ dword &= 0xff; ++ } else { ++ dword = pci_read_config32(NODE_PCI(nodeid, 3), 0xe8); ++ dword >>= 12; ++ /* Bit 15 is CmpCap[2] since Revision D. */ ++ if ((cpuid_ecx(0x80000008) & 0xff) > 3) ++ dword = ((dword & 8) >> 1) | (dword & 3); ++ else ++ dword &= 3; ++ } + return dword; + } + +@@ -50,28 +76,68 @@ static u8 set_apicid_cpuid_lo(void) + return 1; + } + +-static void real_start_other_core(u32 nodeid, u32 cores) ++static void real_start_other_core(uint32_t nodeid, uint32_t cores) + { +- u32 dword, i; ++ ssize_t i; ++ uint32_t dword; + + printk(BIOS_DEBUG, "Start other core - nodeid: %02x cores: %02x\n", nodeid, cores); + + /* set PCI_DEV(0, 0x18+nodeid, 3), 0x44 bit 27 to redirect all MC4 + accesses and error logging to core0 */ + dword = pci_read_config32(NODE_PCI(nodeid, 3), 0x44); +- dword |= 1 << 27; // NbMcaToMstCpuEn bit ++ dword |= 1 << 30; /* SyncFloodOnDramAdrParErr=1 */ ++ dword |= 1 << 27; /* NbMcaToMstCpuEn=1 */ ++ dword |= 1 << 21; /* SyncFloodOnAnyUcErr=1 */ ++ dword |= 1 << 20; /* SyncFloodOnWDT=1 */ ++ dword |= 1 << 2; /* SyncFloodOnDramUcEcc=1 */ + pci_write_config32(NODE_PCI(nodeid, 3), 0x44, dword); +- // set PCI_DEV(0, 0x18+nodeid, 0), 0x68 bit 5 to start core1 +- dword = pci_read_config32(NODE_PCI(nodeid, 0), 0x68); +- dword |= 1 << 5; +- pci_write_config32(NODE_PCI(nodeid, 0), 0x68, dword); +- +- if(cores > 1) { +- dword = pci_read_config32(NODE_PCI(nodeid, 0), 0x168); +- for (i = 0; i < cores - 1; i++) { +- dword |= 1 << i; ++ if (is_fam15h()) { ++ uint32_t core_activation_flags = 0; ++ uint32_t active_cores = 0; ++ ++ /* Set PCI_DEV(0, 0x18+nodeid, 0), 0x1dc bits 7:1 to start cores */ ++ dword = pci_read_config32(NODE_PCI(nodeid, 0), 0x1dc); ++ for (i = 1; i < cores + 1; i++) { ++ core_activation_flags |= 1 << i; ++ } ++ ++ /* Start the first core of each compute unit */ ++ active_cores |= core_activation_flags & 0x55; ++ pci_write_config32(NODE_PCI(nodeid, 0), 0x1dc, dword | active_cores); ++ ++ /* Each core shares a single set of MTRR registers with ++ * another core in the same compute unit, therefore, it ++ * is important that one core in each CU starts in advance ++ * of the other in order to avoid one core stomping all over ++ * the other core's settings. ++ */ ++ ++ /* Wait for the first core of each compute unit to start... */ ++ uint32_t timeout; ++ for (i = 1; i < cores + 1; i++) { ++ if (!(i & 0x1)) { ++ uint32_t ap_apicid = get_boot_apic_id(nodeid, i); ++ timeout = wait_cpu_state(ap_apicid, F10_APSTATE_ASLEEP, F10_APSTATE_ASLEEP); ++ } ++ } ++ ++ /* Start the second core of each compute unit */ ++ active_cores |= core_activation_flags & 0xaa; ++ pci_write_config32(NODE_PCI(nodeid, 0), 0x1dc, dword | active_cores); ++ } else { ++ // set PCI_DEV(0, 0x18+nodeid, 0), 0x68 bit 5 to start core1 ++ dword = pci_read_config32(NODE_PCI(nodeid, 0), 0x68); ++ dword |= 1 << 5; ++ pci_write_config32(NODE_PCI(nodeid, 0), 0x68, dword); ++ ++ if (cores > 1) { ++ dword = pci_read_config32(NODE_PCI(nodeid, 0), 0x168); ++ for (i = 0; i < cores - 1; i++) { ++ dword |= 1 << i; ++ } ++ pci_write_config32(NODE_PCI(nodeid, 0), 0x168, dword); + } +- pci_write_config32(NODE_PCI(nodeid, 0), 0x168, dword); + } + } + +@@ -91,10 +157,9 @@ static void start_other_cores(void) + + for (nodeid = 0; nodeid < nodes; nodeid++) { + u32 cores = get_core_num_in_bsp(nodeid); +- printk(BIOS_DEBUG, "init node: %02x cores: %02x \n", nodeid, cores); ++ printk(BIOS_DEBUG, "init node: %02x cores: %02x pass 1 \n", nodeid, cores); + if (cores > 0) { + real_start_other_core(nodeid, cores); + } + } +- + } +diff --git a/src/cpu/amd/quadcore/quadcore_id.c b/src/cpu/amd/quadcore/quadcore_id.c +index c5921de..c0537b3 100644 +--- a/src/cpu/amd/quadcore/quadcore_id.c ++++ b/src/cpu/amd/quadcore/quadcore_id.c +@@ -43,9 +43,12 @@ struct node_core_id get_node_core_id(u32 nb_cfg_54) + { + struct node_core_id id; + uint8_t apicid; ++ uint8_t fam15h = 0; + uint8_t rev_gte_d = 0; + uint8_t dual_node = 0; + uint32_t f3xe8; ++ uint32_t family; ++ uint32_t model; + + #ifdef __PRE_RAM__ + f3xe8 = pci_read_config32(NODE_PCI(0, 3), 0xe8); +@@ -53,7 +56,17 @@ struct node_core_id get_node_core_id(u32 nb_cfg_54) + f3xe8 = pci_read_config32(get_node_pci(0, 3), 0xe8); + #endif + +- if (cpuid_eax(0x80000001) >= 0x8) ++ family = model = cpuid_eax(0x80000001); ++ model = ((model & 0xf0000) >> 12) | ((model & 0xf0) >> 4); ++ family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8); ++ ++ if (family >= 0x6f) { ++ /* Family 15h or later */ ++ fam15h = 1; ++ nb_cfg_54 = 1; ++ } ++ ++ if ((model >= 0x8) || fam15h) + /* Revision D or later */ + rev_gte_d = 1; + +@@ -67,7 +80,13 @@ struct node_core_id get_node_core_id(u32 nb_cfg_54) + */ + apicid = (cpuid_ebx(1) >> 24) & 0xff; + if( nb_cfg_54) { +- if (rev_gte_d && dual_node) { ++ if (fam15h && dual_node) { ++ id.coreid = apicid & 0x1f; ++ id.nodeid = (apicid & 0x60) >> 5; ++ } else if (fam15h && !dual_node) { ++ id.coreid = apicid & 0xf; ++ id.nodeid = (apicid & 0x70) >> 4; ++ } else if (rev_gte_d && dual_node) { + id.coreid = apicid & 0xf; + id.nodeid = (apicid & 0x30) >> 4; + } else if (rev_gte_d && !dual_node) { +@@ -90,7 +109,25 @@ struct node_core_id get_node_core_id(u32 nb_cfg_54) + } + } + +- if (rev_gte_d && dual_node) { ++ if (fam15h && dual_node) { ++ /* Coreboot expects each separate processor die to be on a different nodeid. ++ * Since the code above returns nodeid 0 even on internal node 1 some fixup is needed... ++ */ ++ uint32_t f5x84; ++ uint8_t core_count; ++ ++#ifdef __PRE_RAM__ ++ f5x84 = pci_read_config32(NODE_PCI(0, 5), 0x84); ++#else ++ f5x84 = pci_read_config32(get_node_pci(0, 5), 0x84); ++#endif ++ core_count = (f5x84 & 0xff) + 1; ++ id.nodeid = id.nodeid * 2; ++ if (id.coreid >= core_count) { ++ id.nodeid += 1; ++ id.coreid = id.coreid - core_count; ++ } ++ } else if (rev_gte_d && dual_node) { + /* Coreboot expects each separate processor die to be on a different nodeid. + * Since the code above returns nodeid 0 even on internal node 1 some fixup is needed... + */ +diff --git a/src/include/cpu/amd/model_10xxx_msr.h b/src/include/cpu/amd/model_10xxx_msr.h +index 6c7dece..7d78e2d 100644 +--- a/src/include/cpu/amd/model_10xxx_msr.h ++++ b/src/include/cpu/amd/model_10xxx_msr.h +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -32,7 +33,13 @@ + #define IC_CFG_MSR 0xC0011021 + #define DC_CFG_MSR 0xC0011022 + #define BU_CFG_MSR 0xC0011023 ++#define FP_CFG_MSR 0xC0011028 ++#define DE_CFG_MSR 0xC0011029 + #define BU_CFG2_MSR 0xC001102A ++#define BU_CFG3_MSR 0xC001102B ++#define EX_CFG_MSR 0xC001102C ++#define LS_CFG2_MSR 0xC001102D ++#define IBS_OP_DATA3_MSR 0xC0011037 + + #define CPU_ID_FEATURES_MSR 0xC0011004 + #define CPU_ID_HYPER_EXT_FEATURES 0xC001100d +diff --git a/src/mainboard/advansus/a785e-i/romstage.c b/src/mainboard/advansus/a785e-i/romstage.c +index 4c2b38a..ab717fd 100644 +--- a/src/mainboard/advansus/a785e-i/romstage.c ++++ b/src/mainboard/advansus/a785e-i/romstage.c +@@ -131,7 +131,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/amd/bimini_fam10/romstage.c b/src/mainboard/amd/bimini_fam10/romstage.c +index e2bd351..5e2cf82 100644 +--- a/src/mainboard/amd/bimini_fam10/romstage.c ++++ b/src/mainboard/amd/bimini_fam10/romstage.c +@@ -123,7 +123,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/amd/mahogany_fam10/romstage.c b/src/mainboard/amd/mahogany_fam10/romstage.c +index 74bc9d5..025a8bb 100644 +--- a/src/mainboard/amd/mahogany_fam10/romstage.c ++++ b/src/mainboard/amd/mahogany_fam10/romstage.c +@@ -125,7 +125,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/amd/serengeti_cheetah_fam10/romstage.c b/src/mainboard/amd/serengeti_cheetah_fam10/romstage.c +index 20d46e6..5063439 100644 +--- a/src/mainboard/amd/serengeti_cheetah_fam10/romstage.c ++++ b/src/mainboard/amd/serengeti_cheetah_fam10/romstage.c +@@ -231,7 +231,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/amd/tilapia_fam10/romstage.c b/src/mainboard/amd/tilapia_fam10/romstage.c +index 89100b1..e37bc08 100644 +--- a/src/mainboard/amd/tilapia_fam10/romstage.c ++++ b/src/mainboard/amd/tilapia_fam10/romstage.c +@@ -125,7 +125,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/asus/kfsn4-dre/romstage.c b/src/mainboard/asus/kfsn4-dre/romstage.c +index 5d1f5a6..dd5c7dc 100644 +--- a/src/mainboard/asus/kfsn4-dre/romstage.c ++++ b/src/mainboard/asus/kfsn4-dre/romstage.c +@@ -245,7 +245,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/asus/m4a78-em/romstage.c b/src/mainboard/asus/m4a78-em/romstage.c +index 82f30d9..82b96bf 100644 +--- a/src/mainboard/asus/m4a78-em/romstage.c ++++ b/src/mainboard/asus/m4a78-em/romstage.c +@@ -127,7 +127,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/asus/m4a785-m/romstage.c b/src/mainboard/asus/m4a785-m/romstage.c +index 780bf81..30975fa 100644 +--- a/src/mainboard/asus/m4a785-m/romstage.c ++++ b/src/mainboard/asus/m4a785-m/romstage.c +@@ -127,7 +127,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/asus/m5a88-v/romstage.c b/src/mainboard/asus/m5a88-v/romstage.c +index 38761a6..4edaba2 100644 +--- a/src/mainboard/asus/m5a88-v/romstage.c ++++ b/src/mainboard/asus/m5a88-v/romstage.c +@@ -128,7 +128,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/avalue/eax-785e/romstage.c b/src/mainboard/avalue/eax-785e/romstage.c +index 764a5c6..447012b 100644 +--- a/src/mainboard/avalue/eax-785e/romstage.c ++++ b/src/mainboard/avalue/eax-785e/romstage.c +@@ -132,7 +132,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/gigabyte/ma785gm/romstage.c b/src/mainboard/gigabyte/ma785gm/romstage.c +index db4e449..444e59d 100644 +--- a/src/mainboard/gigabyte/ma785gm/romstage.c ++++ b/src/mainboard/gigabyte/ma785gm/romstage.c +@@ -122,7 +122,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/gigabyte/ma785gmt/romstage.c b/src/mainboard/gigabyte/ma785gmt/romstage.c +index 4ce7c58..705d7c5 100644 +--- a/src/mainboard/gigabyte/ma785gmt/romstage.c ++++ b/src/mainboard/gigabyte/ma785gmt/romstage.c +@@ -122,7 +122,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/gigabyte/ma78gm/romstage.c b/src/mainboard/gigabyte/ma78gm/romstage.c +index d2a0b95..5d21801 100644 +--- a/src/mainboard/gigabyte/ma78gm/romstage.c ++++ b/src/mainboard/gigabyte/ma78gm/romstage.c +@@ -125,7 +125,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/hp/dl165_g6_fam10/romstage.c b/src/mainboard/hp/dl165_g6_fam10/romstage.c +index 97e60d5..26c0bb9 100644 +--- a/src/mainboard/hp/dl165_g6_fam10/romstage.c ++++ b/src/mainboard/hp/dl165_g6_fam10/romstage.c +@@ -137,7 +137,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/iei/kino-780am2-fam10/romstage.c b/src/mainboard/iei/kino-780am2-fam10/romstage.c +index edbae3a..321eea6 100644 +--- a/src/mainboard/iei/kino-780am2-fam10/romstage.c ++++ b/src/mainboard/iei/kino-780am2-fam10/romstage.c +@@ -125,7 +125,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/jetway/pa78vm5/romstage.c b/src/mainboard/jetway/pa78vm5/romstage.c +index 16bb089..93dd2ce 100644 +--- a/src/mainboard/jetway/pa78vm5/romstage.c ++++ b/src/mainboard/jetway/pa78vm5/romstage.c +@@ -130,7 +130,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/msi/ms9652_fam10/romstage.c b/src/mainboard/msi/ms9652_fam10/romstage.c +index 4ea3306..5da971f 100644 +--- a/src/mainboard/msi/ms9652_fam10/romstage.c ++++ b/src/mainboard/msi/ms9652_fam10/romstage.c +@@ -150,7 +150,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/supermicro/h8dmr_fam10/romstage.c b/src/mainboard/supermicro/h8dmr_fam10/romstage.c +index c224dbc..1425546 100644 +--- a/src/mainboard/supermicro/h8dmr_fam10/romstage.c ++++ b/src/mainboard/supermicro/h8dmr_fam10/romstage.c +@@ -146,7 +146,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/supermicro/h8qme_fam10/romstage.c b/src/mainboard/supermicro/h8qme_fam10/romstage.c +index 0f9445b..4721eba 100644 +--- a/src/mainboard/supermicro/h8qme_fam10/romstage.c ++++ b/src/mainboard/supermicro/h8qme_fam10/romstage.c +@@ -214,7 +214,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/mainboard/supermicro/h8scm_fam10/romstage.c b/src/mainboard/supermicro/h8scm_fam10/romstage.c +index 4ea14fe..858aca0 100644 +--- a/src/mainboard/supermicro/h8scm_fam10/romstage.c ++++ b/src/mainboard/supermicro/h8scm_fam10/romstage.c +@@ -136,7 +136,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + /* TODO: The Kernel must support 12 processor, otherwise the interrupt +diff --git a/src/mainboard/tyan/s2912_fam10/romstage.c b/src/mainboard/tyan/s2912_fam10/romstage.c +index 0030619..cdf51b1 100644 +--- a/src/mainboard/tyan/s2912_fam10/romstage.c ++++ b/src/mainboard/tyan/s2912_fam10/romstage.c +@@ -149,7 +149,7 @@ void cache_as_ram_main(unsigned long bist, unsigned long cpu_init_detectedx) + + post_code(0x33); + +- cpuSetAMDMSR(); ++ cpuSetAMDMSR(0); + post_code(0x34); + + amd_ht_init(sysinfo); +diff --git a/src/northbridge/amd/amdfam10/Kconfig b/src/northbridge/amd/amdfam10/Kconfig +index ada5b9f..cb0d109 100644 +--- a/src/northbridge/amd/amdfam10/Kconfig ++++ b/src/northbridge/amd/amdfam10/Kconfig +@@ -96,7 +96,7 @@ endif + if HAVE_ACPI_RESUME + config S3_DATA_SIZE + int +- default 16384 ++ default 32768 + endif + + if DIMM_DDR2 +diff --git a/src/northbridge/amd/amdfam10/Makefile.inc b/src/northbridge/amd/amdfam10/Makefile.inc +index b4097b4..4098dce 100644 +--- a/src/northbridge/amd/amdfam10/Makefile.inc ++++ b/src/northbridge/amd/amdfam10/Makefile.inc +@@ -2,6 +2,8 @@ ifeq ($(CONFIG_NORTHBRIDGE_AMD_AMDFAM10),y) + + ramstage-y += northbridge.c + ramstage-y += misc_control.c ++ramstage-y += link_control.c ++ramstage-y += nb_control.c + romstage-y += amdfam10_util.c + ramstage-y += amdfam10_util.c + +diff --git a/src/northbridge/amd/amdfam10/amdfam10.h b/src/northbridge/amd/amdfam10/amdfam10.h +index a1e08a0..b724394 100644 +--- a/src/northbridge/amd/amdfam10/amdfam10.h ++++ b/src/northbridge/amd/amdfam10/amdfam10.h +@@ -962,9 +962,12 @@ that are corresponding to 0x01, 0x02, 0x03, 0x05, 0x06, 0x07 + + #define LAPIC_MSG_REG 0x380 + #define F10_APSTATE_STARTED 0x13 // start of AP execution +-#define F10_APSTATE_STOPPED 0x14 // allow AP to stop ++#define F10_APSTATE_ASLEEP 0x14 // AP sleeping ++#define F10_APSTATE_STOPPED 0x15 // allow AP to stop + #define F10_APSTATE_RESET 0x01 // waiting for warm reset + ++#define MAX_CORES_SUPPORTED 128 ++ + #include "nums.h" + + #ifdef __PRE_RAM__ +@@ -1038,7 +1041,6 @@ struct sys_info { + + struct MCTStatStruc MCTstat; + struct DCTStatStruc DCTstatA[NODE_NUMS]; +- + } __attribute__((packed)); + + #ifdef __PRE_RAM__ +diff --git a/src/northbridge/amd/amdfam10/amdfam10_util.c b/src/northbridge/amd/amdfam10/amdfam10_util.c +index 423bb73..a4045bd 100644 +--- a/src/northbridge/amd/amdfam10/amdfam10_util.c ++++ b/src/northbridge/amd/amdfam10/amdfam10_util.c +@@ -34,14 +34,14 @@ u32 Get_NB32(u32 dev, u32 reg) + } + #endif + +-u32 mctGetLogicalCPUID(u32 Node) ++uint64_t mctGetLogicalCPUID(u32 Node) + { + /* Converts the CPUID to a logical ID MASK that is used to check + CPU version support versions */ + u32 dev; + u32 val, valx; + u32 family, model, stepping; +- u32 ret; ++ uint64_t ret; + + if (Node == 0xFF) { /* current node */ + val = cpuid_eax(0x80000001); +@@ -100,9 +100,16 @@ u32 mctGetLogicalCPUID(u32 Node) + case 0x100a0: + ret = AMD_PH_E0; + break; ++ case 0x15012: ++ case 0x1501f: ++ ret = AMD_OR_B2; ++ break; ++ case 0x15020: ++ ret = AMD_OR_C0; ++ break; + default: + /* FIXME: mabe we should die() here. */ +- printk(BIOS_ERR, "FIXME! CPU Version unknown or not supported! \n"); ++ printk(BIOS_ERR, "FIXME! CPU Version unknown or not supported! %08x\n", valx); + ret = 0; + } + +diff --git a/src/northbridge/amd/amdfam10/link_control.c b/src/northbridge/amd/amdfam10/link_control.c +new file mode 100644 +index 0000000..1091ef4 +--- /dev/null ++++ b/src/northbridge/amd/amdfam10/link_control.c +@@ -0,0 +1,86 @@ ++/* ++ * This file is part of the coreboot project. ++ * ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; version 2 of the License. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* Configure various power control registers, including processor ++ * boost support. ++ */ ++ ++#include <console/console.h> ++#include <device/device.h> ++#include <device/pci.h> ++#include <device/pci_ids.h> ++#include <device/pci_ops.h> ++#include <pc80/mc146818rtc.h> ++#include <lib.h> ++#include <cpu/amd/model_10xxx_rev.h> ++ ++#include "amdfam10.h" ++ ++static inline uint8_t is_fam15h(void) ++{ ++ uint8_t fam15h = 0; ++ uint32_t family; ++ ++ family = cpuid_eax(0x80000001); ++ family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8); ++ ++ if (family >= 0x6f) ++ /* Family 15h or later */ ++ fam15h = 1; ++ ++ return fam15h; ++} ++ ++static void nb_control_init(struct device *dev) ++{ ++ uint32_t dword; ++ ++ printk(BIOS_DEBUG, "NB: Function 4 Link Control.. "); ++ ++ if (is_fam15h()) { ++ /* Enable APM */ ++ dword = pci_read_config32(dev, 0x15c); ++ dword |= (0x1 << 7); /* ApmMasterEn = 1 */ ++ pci_write_config32(dev, 0x15c, dword); ++ } ++ ++ printk(BIOS_DEBUG, "done.\n"); ++} ++ ++ ++static struct device_operations mcf4_ops = { ++ .read_resources = pci_dev_read_resources, ++ .set_resources = pci_dev_set_resources, ++ .enable_resources = pci_dev_enable_resources, ++ .init = nb_control_init, ++ .scan_bus = 0, ++ .ops_pci = 0, ++}; ++ ++static const struct pci_driver mcf4_driver_fam10 __pci_driver = { ++ .ops = &mcf4_ops, ++ .vendor = PCI_VENDOR_ID_AMD, ++ .device = 0x1204, ++}; ++ ++static const struct pci_driver mcf4_driver_fam15 __pci_driver = { ++ .ops = &mcf4_ops, ++ .vendor = PCI_VENDOR_ID_AMD, ++ .device = 0x1604, ++}; +\ No newline at end of file +diff --git a/src/northbridge/amd/amdfam10/misc_control.c b/src/northbridge/amd/amdfam10/misc_control.c +index 90a4db1..8777e8f 100644 +--- a/src/northbridge/amd/amdfam10/misc_control.c ++++ b/src/northbridge/amd/amdfam10/misc_control.c +@@ -4,6 +4,7 @@ + * Copyright (C) 2003 by Eric Biederman + * Copyright (C) Stefan Reinauer + * Copyright (C) 2007 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -152,3 +153,9 @@ static const struct pci_driver mcf3_driver __pci_driver = { + .vendor = PCI_VENDOR_ID_AMD, + .device = 0x1203, + }; ++ ++static const struct pci_driver mcf3_driver_fam15 __pci_driver = { ++ .ops = &mcf3_ops, ++ .vendor = PCI_VENDOR_ID_AMD, ++ .device = 0x1603, ++}; +diff --git a/src/northbridge/amd/amdfam10/nb_control.c b/src/northbridge/amd/amdfam10/nb_control.c +new file mode 100644 +index 0000000..f95b6f8 +--- /dev/null ++++ b/src/northbridge/amd/amdfam10/nb_control.c +@@ -0,0 +1,85 @@ ++/* ++ * This file is part of the coreboot project. ++ * ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; version 2 of the License. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* Configure various power control registers, including processor boost ++ * and TDP monitoring support. ++ */ ++ ++#include <console/console.h> ++#include <device/device.h> ++#include <device/pci.h> ++#include <device/pci_ids.h> ++#include <device/pci_ops.h> ++#include <pc80/mc146818rtc.h> ++#include <lib.h> ++#include <cpu/amd/model_10xxx_rev.h> ++ ++#include "amdfam10.h" ++ ++static void nb_control_init(struct device *dev) ++{ ++ uint32_t dword; ++ uint32_t f5x80; ++ uint8_t cu_enabled; ++ uint8_t compute_unit_count = 0; ++ ++ printk(BIOS_DEBUG, "NB: Function 5 Northbridge Control.. "); ++ ++ /* Determine the number of active compute units on this node */ ++ f5x80 = pci_read_config32(dev, 0x80); ++ cu_enabled = f5x80 & 0xf; ++ if (cu_enabled == 0x1) ++ compute_unit_count = 1; ++ if (cu_enabled == 0x3) ++ compute_unit_count = 2; ++ if (cu_enabled == 0x7) ++ compute_unit_count = 3; ++ if (cu_enabled == 0xf) ++ compute_unit_count = 4; ++ ++ /* Configure Processor TDP Running Average */ ++ dword = pci_read_config32(dev, 0xe0); ++ dword &= ~0xf; /* RunAvgRange = 0x9 */ ++ dword |= 0x9; ++ pci_write_config32(dev, 0xe0, dword); ++ ++ /* Configure northbridge P-states */ ++ dword = pci_read_config32(dev, 0xe0); ++ dword &= ~(0x7 << 9); /* NbPstateThreshold = compute_unit_count */ ++ dword |= (compute_unit_count & 0x7) << 9; ++ pci_write_config32(dev, 0xe0, dword); ++ ++ printk(BIOS_DEBUG, "done.\n"); ++} ++ ++ ++static struct device_operations mcf5_ops = { ++ .read_resources = pci_dev_read_resources, ++ .set_resources = pci_dev_set_resources, ++ .enable_resources = pci_dev_enable_resources, ++ .init = nb_control_init, ++ .scan_bus = 0, ++ .ops_pci = 0, ++}; ++ ++static const struct pci_driver mcf5_driver_fam15 __pci_driver = { ++ .ops = &mcf5_ops, ++ .vendor = PCI_VENDOR_ID_AMD, ++ .device = 0x1605, ++}; +\ No newline at end of file +diff --git a/src/northbridge/amd/amdfam10/northbridge.c b/src/northbridge/amd/amdfam10/northbridge.c +index adcfdf0..baf77d6 100644 +--- a/src/northbridge/amd/amdfam10/northbridge.c ++++ b/src/northbridge/amd/amdfam10/northbridge.c +@@ -81,6 +81,21 @@ device_t get_node_pci(u32 nodeid, u32 fn) + #endif + } + ++static inline uint8_t is_fam15h(void) ++{ ++ uint8_t fam15h = 0; ++ uint32_t family; ++ ++ family = cpuid_eax(0x80000001); ++ family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8); ++ ++ if (family >= 0x6f) ++ /* Family 15h or later */ ++ fam15h = 1; ++ ++ return fam15h; ++} ++ + static void get_fx_devs(void) + { + int i; +@@ -202,7 +217,7 @@ static void amd_g34_fixup(struct bus *link, device_t dev) + /* Revision D or later */ + rev_gte_d = 1; + +- if (rev_gte_d) { ++ if (rev_gte_d || is_fam15h()) { + f3xe8 = pci_read_config32(get_node_pci(0, 3), 0xe8); + + /* Check for dual node capability */ +@@ -215,6 +230,15 @@ static void amd_g34_fixup(struct bus *link, device_t dev) + */ + f3xe8 = pci_read_config32(get_node_pci(nodeid, 3), 0xe8); + uint8_t internal_node_number = ((f3xe8 & 0xc0000000) >> 30); ++ uint8_t defective_link_number_1; ++ uint8_t defective_link_number_2; ++ if (is_fam15h()) { ++ defective_link_number_1 = 4; /* Link 0 Sublink 1 */ ++ defective_link_number_2 = 7; /* Link 3 Sublink 1 */ ++ } else { ++ defective_link_number_1 = 6; /* Link 2 Sublink 1 */ ++ defective_link_number_2 = 5; /* Link 1 Sublink 1 */ ++ } + if (internal_node_number == 0) { + /* Node 0 */ + if (link->link_num == 6) /* Link 2 Sublink 1 */ +@@ -314,6 +338,46 @@ static void amdfam10_scan_chains(device_t dev) + { + struct bus *link; + ++#if CONFIG_CPU_AMD_SOCKET_G34_NON_AGESA ++ if (is_fam15h()) { ++ uint8_t current_link_number = 0; ++ ++ for (link = dev->link_list; link; link = link->next) { ++ /* The following links have changed position in Fam15h G34 processors: ++ * Fam10 Fam15 ++ * Node 0 ++ * L3 --> L1 ++ * L0 --> L3 ++ * L1 --> L2 ++ * L2 --> L0 ++ * Node 1 ++ * L0 --> L0 ++ * L1 --> L3 ++ * L2 --> L1 ++ * L3 --> L2 ++ */ ++ if (link->link_num == 0) ++ link->link_num = 3; ++ else if (link->link_num == 1) ++ link->link_num = 2; ++ else if (link->link_num == 2) ++ link->link_num = 0; ++ else if (link->link_num == 3) ++ link->link_num = 1; ++ else if (link->link_num == 5) ++ link->link_num = 7; ++ else if (link->link_num == 6) ++ link->link_num = 5; ++ else if (link->link_num == 7) ++ link->link_num = 6; ++ ++ current_link_number++; ++ if (current_link_number > 3) ++ current_link_number = 0; ++ } ++ } ++#endif ++ + /* Do sb ht chain at first, in case s2885 put sb chain (8131/8111) on link2, but put 8151 on link0 */ + trim_ht_chain(dev); + +@@ -620,13 +684,21 @@ static const struct pci_driver mcf0_driver __pci_driver = { + .device = 0x1200, + }; + ++ + static void amdfam10_nb_init(void *chip_info) + { + relocate_sb_ht_chain(); + } + ++static const struct pci_driver mcf0_driver_fam15 __pci_driver = { ++ .ops = &northbridge_operations, ++ .vendor = PCI_VENDOR_ID_AMD, ++ .device = 0x1600, ++}; ++ ++ + struct chip_operations northbridge_amd_amdfam10_ops = { +- CHIP_NAME("AMD FAM10 Northbridge") ++ CHIP_NAME("AMD Family 10h/15h Northbridge") + .enable_dev = 0, + .init = amdfam10_nb_init, + }; +@@ -950,38 +1022,61 @@ static int amdfam10_get_smbios_data16(int* count, int handle, unsigned long *cur + + static uint16_t amdmct_mct_speed_enum_to_mhz(uint8_t speed) + { +- if (IS_ENABLED(CONFIG_DIMM_DDR2)) { +- switch (speed) { +- case 1: +- return 200; +- case 2: +- return 266; +- case 3: +- return 333; +- case 4: +- return 400; +- case 5: +- return 533; +- default: +- return 0; +- } +- } else if (IS_ENABLED(CONFIG_DIMM_DDR3)) { +- switch (speed) { +- case 3: +- return 333; +- case 4: +- return 400; +- case 5: +- return 533; +- case 6: +- return 667; +- case 7: +- return 800; +- default: +- return 0; ++ if (is_fam15h()) { ++ if (IS_ENABLED(CONFIG_DIMM_DDR3)) { ++ switch (speed) { ++ case 0x4: ++ return 333; ++ case 0x6: ++ return 400; ++ case 0xa: ++ return 533; ++ case 0xe: ++ return 667; ++ case 0x12: ++ return 800; ++ case 0x16: ++ return 933; ++ default: ++ return 0; ++ } ++ } else { ++ return 0; + } + } else { +- return 0; ++ if (IS_ENABLED(CONFIG_DIMM_DDR2)) { ++ switch (speed) { ++ case 1: ++ return 200; ++ case 2: ++ return 266; ++ case 3: ++ return 333; ++ case 4: ++ return 400; ++ case 5: ++ return 533; ++ default: ++ return 0; ++ } ++ } else if (IS_ENABLED(CONFIG_DIMM_DDR3)) { ++ switch (speed) { ++ case 3: ++ return 333; ++ case 4: ++ return 400; ++ case 5: ++ return 533; ++ case 6: ++ return 667; ++ case 7: ++ return 800; ++ default: ++ return 0; ++ } ++ } else { ++ return 0; ++ } + } + } + +@@ -1076,6 +1171,8 @@ static int amdfam10_get_smbios_data17(int* count, int handle, int parent_handle, + #if IS_ENABLED(CONFIG_DIMM_DDR3) + /* Find the maximum and minimum supported voltages */ + uint8_t supported_voltages = mem_info->dct_stat[node].DimmSupportedVoltages[slot]; ++ uint8_t configured_voltage = mem_info->dct_stat[node].DimmConfiguredVoltage[slot]; ++ + if (supported_voltages & 0x8) + t->minimum_voltage = 1150; + else if (supported_voltages & 0x4) +@@ -1094,7 +1191,14 @@ static int amdfam10_get_smbios_data17(int* count, int handle, int parent_handle, + else if (supported_voltages & 0x8) + t->maximum_voltage = 1150; + +- t->configured_voltage = mem_info->dct_stat[node].DimmConfiguredVoltage[slot]; ++ if (configured_voltage & 0x8) ++ t->configured_voltage = 1150; ++ else if (configured_voltage & 0x4) ++ t->configured_voltage = 1250; ++ else if (configured_voltage & 0x2) ++ t->configured_voltage = 1350; ++ else if (configured_voltage & 0x1) ++ t->configured_voltage = 1500; + #endif + } + t->memory_error_information_handle = 0xFFFE; /* no error information handle available */ +@@ -1233,12 +1337,14 @@ static void cpu_bus_scan(device_t dev) + #if CONFIG_CBB + device_t pci_domain; + #endif ++ int nvram = 0; + int i,j; + int nodes; + unsigned nb_cfg_54; + unsigned siblings; + int cores_found; + int disable_siblings; ++ uint8_t disable_cu_siblings = 0; + unsigned ApicIdCoreIdSize; + + nb_cfg_54 = 0; +@@ -1325,14 +1431,23 @@ static void cpu_bus_scan(device_t dev) + /* Always use the devicetree node with lapic_id 0 for BSP. */ + remap_bsp_lapic(cpu_bus); + ++ if (get_option(&nvram, "compute_unit_siblings") == CB_SUCCESS) ++ disable_cu_siblings = !!nvram; ++ ++ if (disable_cu_siblings) ++ printk(BIOS_DEBUG, "Disabling siblings on each compute unit as requested\n"); ++ + for(i = 0; i < nodes; i++) { + device_t cdb_dev; + unsigned busn, devn; + struct bus *pbus; + ++ uint8_t fam15h = 0; + uint8_t rev_gte_d = 0; + uint8_t dual_node = 0; + uint32_t f3xe8; ++ uint32_t family; ++ uint32_t model; + + busn = CONFIG_CBB; + devn = CONFIG_CDB+i; +@@ -1372,7 +1487,16 @@ static void cpu_bus_scan(device_t dev) + + f3xe8 = pci_read_config32(get_node_pci(0, 3), 0xe8); + +- if (cpuid_eax(0x80000001) >= 0x8) ++ family = model = cpuid_eax(0x80000001); ++ model = ((model & 0xf0000) >> 12) | ((model & 0xf0) >> 4); ++ ++ if (is_fam15h()) { ++ /* Family 15h or later */ ++ fam15h = 1; ++ nb_cfg_54 = 1; ++ } ++ ++ if ((model >= 0x8) || fam15h) + /* Revision D or later */ + rev_gte_d = 1; + +@@ -1382,13 +1506,20 @@ static void cpu_bus_scan(device_t dev) + dual_node = 1; + + cores_found = 0; // one core +- cdb_dev = dev_find_slot(busn, PCI_DEVFN(devn, 3)); ++ if (fam15h) ++ cdb_dev = dev_find_slot(busn, PCI_DEVFN(devn, 5)); ++ else ++ cdb_dev = dev_find_slot(busn, PCI_DEVFN(devn, 3)); + int enable_node = cdb_dev && cdb_dev->enabled; + if (enable_node) { +- j = pci_read_config32(cdb_dev, 0xe8); +- cores_found = (j >> 12) & 3; // dev is func 3 +- if (siblings > 3) +- cores_found |= (j >> 13) & 4; ++ if (fam15h) { ++ cores_found = pci_read_config32(cdb_dev, 0x84) & 0xff; ++ } else { ++ j = pci_read_config32(cdb_dev, 0xe8); ++ cores_found = (j >> 12) & 3; // dev is func 3 ++ if (siblings > 3) ++ cores_found |= (j >> 13) & 4; ++ } + printk(BIOS_DEBUG, " %s siblings=%d\n", dev_path(cdb_dev), cores_found); + } + +@@ -1408,15 +1539,24 @@ static void cpu_bus_scan(device_t dev) + + if (dual_node) { + apic_id = 0; +- if (nb_cfg_54) { +- apic_id |= ((i >> 1) & 0x3) << 4; /* Node ID */ ++ if (fam15h) { ++ apic_id |= ((i >> 1) & 0x3) << 5; /* Node ID */ + apic_id |= ((i & 0x1) * (siblings + 1)) + j; /* Core ID */ + } else { +- apic_id |= i & 0x3; /* Node ID */ +- apic_id |= (((i & 0x1) * (siblings + 1)) + j) << 4; /* Core ID */ ++ if (nb_cfg_54) { ++ apic_id |= ((i >> 1) & 0x3) << 4; /* Node ID */ ++ apic_id |= ((i & 0x1) * (siblings + 1)) + j; /* Core ID */ ++ } else { ++ apic_id |= i & 0x3; /* Node ID */ ++ apic_id |= (((i & 0x1) * (siblings + 1)) + j) << 4; /* Core ID */ ++ } + } + } else { +- apic_id = i * (nb_cfg_54?(siblings+1):1) + j * (nb_cfg_54?1:64); // ? ++ if (fam15h) { ++ apic_id = (i * (siblings + 1)) + j; ++ } else { ++ apic_id = i * (nb_cfg_54?(siblings+1):1) + j * (nb_cfg_54?1:64); // ? ++ } + } + + #if CONFIG_ENABLE_APIC_EXT_ID && (CONFIG_APIC_ID_OFFSET>0) +@@ -1426,6 +1566,9 @@ static void cpu_bus_scan(device_t dev) + } + } + #endif ++ if (disable_cu_siblings && (j & 0x1)) ++ continue; ++ + device_t cpu = add_cpu_device(cpu_bus, apic_id, enable_node); + if (cpu) + amd_cpu_topology(cpu, i, j); +@@ -1484,6 +1627,6 @@ static void root_complex_enable_dev(struct device *dev) + } + + struct chip_operations northbridge_amd_amdfam10_root_complex_ops = { +- CHIP_NAME("AMD FAM10 Root Complex") ++ CHIP_NAME("AMD Family 10h/15h Root Complex") + .enable_dev = root_complex_enable_dev, + }; +diff --git a/src/northbridge/amd/amdfam10/raminit_amdmct.c b/src/northbridge/amd/amdfam10/raminit_amdmct.c +index 5068e7a..cae228f 100644 +--- a/src/northbridge/amd/amdfam10/raminit_amdmct.c ++++ b/src/northbridge/amd/amdfam10/raminit_amdmct.c +@@ -44,8 +44,120 @@ static void print_tf(const char *func, const char *strval) + #endif + } + +-static uint16_t mct_MaxLoadFreq(uint8_t count, uint8_t registered, uint16_t freq) ++static inline void fam15h_switch_dct(uint32_t dev, uint8_t dct) + { ++ uint32_t dword; ++ ++ dword = Get_NB32(dev, 0x10c); ++ dword &= ~0x1; ++ dword |= (dct & 0x1); ++ Set_NB32(dev, 0x10c, dword); ++} ++ ++static inline void fam15h_switch_nb_pstate_config_reg(uint32_t dev, uint8_t nb_pstate) ++{ ++ uint32_t dword; ++ ++ dword = Get_NB32(dev, 0x10c); ++ dword &= ~(0x3 << 4); ++ dword |= (nb_pstate & 0x3) << 4; ++ Set_NB32(dev, 0x10c, dword); ++} ++ ++static inline uint32_t Get_NB32_DCT(uint32_t dev, uint8_t dct, uint32_t reg) ++{ ++ if (is_fam15h()) { ++ /* Obtain address of function 0x1 */ ++ uint32_t dev_map = (dev & (~(0x7 << 12))) | (0x1 << 12); ++ fam15h_switch_dct(dev_map, dct); ++ return Get_NB32(dev, reg); ++ } else { ++ return Get_NB32(dev, (0x100 * dct) + reg); ++ } ++} ++ ++static inline void Set_NB32_DCT(uint32_t dev, uint8_t dct, uint32_t reg, uint32_t val) ++{ ++ if (is_fam15h()) { ++ /* Obtain address of function 0x1 */ ++ uint32_t dev_map = (dev & (~(0x7 << 12))) | (0x1 << 12); ++ fam15h_switch_dct(dev_map, dct); ++ Set_NB32(dev, reg, val); ++ } else { ++ Set_NB32(dev, (0x100 * dct) + reg, val); ++ } ++} ++ ++static inline uint32_t Get_NB32_DCT_NBPstate(uint32_t dev, uint8_t dct, uint8_t nb_pstate, uint32_t reg) ++{ ++ if (is_fam15h()) { ++ /* Obtain address of function 0x1 */ ++ uint32_t dev_map = (dev & (~(0x7 << 12))) | (0x1 << 12); ++ fam15h_switch_dct(dev_map, dct); ++ fam15h_switch_nb_pstate_config_reg(dev_map, nb_pstate); ++ return Get_NB32(dev, reg); ++ } else { ++ return Get_NB32(dev, (0x100 * dct) + reg); ++ } ++} ++ ++static inline void Set_NB32_DCT_NBPstate(uint32_t dev, uint8_t dct, uint8_t nb_pstate, uint32_t reg, uint32_t val) ++{ ++ if (is_fam15h()) { ++ /* Obtain address of function 0x1 */ ++ uint32_t dev_map = (dev & (~(0x7 << 12))) | (0x1 << 12); ++ fam15h_switch_dct(dev_map, dct); ++ fam15h_switch_nb_pstate_config_reg(dev_map, nb_pstate); ++ Set_NB32(dev, reg, val); ++ } else { ++ Set_NB32(dev, (0x100 * dct) + reg, val); ++ } ++} ++ ++static inline uint32_t Get_NB32_index_wait_DCT(uint32_t dev, uint8_t dct, uint32_t index_reg, uint32_t index) ++{ ++ if (is_fam15h()) { ++ /* Obtain address of function 0x1 */ ++ uint32_t dev_map = (dev & (~(0x7 << 12))) | (0x1 << 12); ++ fam15h_switch_dct(dev_map, dct); ++ return Get_NB32_index_wait(dev, index_reg, index); ++ } else { ++ return Get_NB32_index_wait(dev, (0x100 * dct) + index_reg, index); ++ } ++} ++ ++static inline void Set_NB32_index_wait_DCT(uint32_t dev, uint8_t dct, uint32_t index_reg, uint32_t index, uint32_t data) ++{ ++ if (is_fam15h()) { ++ /* Obtain address of function 0x1 */ ++ uint32_t dev_map = (dev & (~(0x7 << 12))) | (0x1 << 12); ++ fam15h_switch_dct(dev_map, dct); ++ Set_NB32_index_wait(dev, index_reg, index, data); ++ } else { ++ Set_NB32_index_wait(dev, (0x100 * dct) + index_reg, index, data); ++ } ++} ++ ++static uint16_t voltage_index_to_mv(uint8_t index) ++{ ++ if (index & 0x8) ++ return 1150; ++ if (index & 0x4) ++ return 1250; ++ else if (index & 0x2) ++ return 1350; ++ else ++ return 1500; ++} ++ ++static uint16_t mct_MaxLoadFreq(uint8_t count, uint8_t highest_rank_count, uint8_t registered, uint8_t voltage, uint16_t freq) ++{ ++ /* FIXME ++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel ++ * For now assume a maximum of 2 DIMMs per channel can be installed ++ */ ++ uint8_t MaxDimmsInstallable = 2; ++ + /* Return limited maximum RAM frequency */ + if (IS_ENABLED(CONFIG_DIMM_DDR2)) { + if (IS_ENABLED(CONFIG_DIMM_REGISTERED) && registered) { +@@ -68,34 +180,178 @@ static uint16_t mct_MaxLoadFreq(uint8_t count, uint8_t registered, uint16_t freq + } + } + } else if (IS_ENABLED(CONFIG_DIMM_DDR3)) { +- if (IS_ENABLED(CONFIG_DIMM_REGISTERED) && registered) { +- /* K10 BKDG Rev. 3.62 Table 34 */ +- if (count > 2) { +- /* Limit to DDR3-800 */ +- if (freq > 400) { +- freq = 400; +- print_tf(__func__, ": More than 2 registered DIMMs on channel; limiting to DDR3-800\n"); ++ if (voltage == 0) { ++ printk(BIOS_DEBUG, "%s: WARNING: Mainboard DDR3 voltage unknown, assuming 1.5V!\n", __func__); ++ voltage = 0x1; ++ } ++ ++ if (is_fam15h()) { ++ if (IS_ENABLED(CONFIG_DIMM_REGISTERED) && registered) { ++ /* Fam15h BKDG Rev. 3.14 Table 27 */ ++ if (voltage & 0x4) { ++ /* 1.25V */ ++ if (count > 1) { ++ if (highest_rank_count > 1) { ++ /* Limit to DDR3-1066 */ ++ if (freq > 533) { ++ freq = 533; ++ printk(BIOS_DEBUG, "%s: More than 1 registered DIMM on %dmV channel; limiting to DDR3-1066\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } else { ++ /* Limit to DDR3-1333 */ ++ if (freq > 666) { ++ freq = 666; ++ printk(BIOS_DEBUG, "%s: More than 1 registered DIMM on %dmV channel; limiting to DDR3-1333\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } ++ } else { ++ /* Limit to DDR3-1333 */ ++ if (freq > 666) { ++ freq = 666; ++ printk(BIOS_DEBUG, "%s: 1 registered DIMM on %dmV channel; limiting to DDR3-1333\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } ++ } else if (voltage & 0x2) { ++ /* 1.35V */ ++ if (count > 1) { ++ /* Limit to DDR3-1333 */ ++ if (freq > 666) { ++ freq = 666; ++ printk(BIOS_DEBUG, "%s: More than 1 registered DIMM on %dmV channel; limiting to DDR3-1333\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } else { ++ /* Limit to DDR3-1600 */ ++ if (freq > 800) { ++ freq = 800; ++ printk(BIOS_DEBUG, "%s: 1 registered DIMM on %dmV channel; limiting to DDR3-1600\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } ++ } else if (voltage & 0x1) { ++ /* 1.50V */ ++ if (count > 1) { ++ /* Limit to DDR3-1600 */ ++ if (freq > 800) { ++ freq = 800; ++ printk(BIOS_DEBUG, "%s: More than 1 registered DIMM on %dmV channel; limiting to DDR3-1600\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } else { ++ /* Limit to DDR3-1866 */ ++ if (freq > 933) { ++ freq = 933; ++ printk(BIOS_DEBUG, "%s: 1 registered DIMM on %dmV channel; limiting to DDR3-1866\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } ++ } ++ } else { ++ /* Fam15h BKDG Rev. 3.14 Table 26 */ ++ if (voltage & 0x4) { ++ /* 1.25V */ ++ if (count > 1) { ++ if (highest_rank_count > 1) { ++ /* Limit to DDR3-1066 */ ++ if (freq > 533) { ++ freq = 533; ++ printk(BIOS_DEBUG, "%s: More than 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1066\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } else { ++ /* Limit to DDR3-1333 */ ++ if (freq > 666) { ++ freq = 666; ++ printk(BIOS_DEBUG, "%s: More than 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1333\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } ++ } else { ++ /* Limit to DDR3-1333 */ ++ if (freq > 666) { ++ freq = 666; ++ printk(BIOS_DEBUG, "%s: 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1333\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } ++ } else if (voltage & 0x2) { ++ /* 1.35V */ ++ if (MaxDimmsInstallable > 1) { ++ /* Limit to DDR3-1333 */ ++ if (freq > 666) { ++ freq = 666; ++ printk(BIOS_DEBUG, "%s: More than 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1333\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } else { ++ /* Limit to DDR3-1600 */ ++ if (freq > 800) { ++ freq = 800; ++ printk(BIOS_DEBUG, "%s: 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1600\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } ++ } else if (voltage & 0x1) { ++ if (MaxDimmsInstallable == 1) { ++ if (count > 1) { ++ /* Limit to DDR3-1600 */ ++ if (freq > 800) { ++ freq = 800; ++ printk(BIOS_DEBUG, "%s: More than 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1600\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } else { ++ /* Limit to DDR3-1866 */ ++ if (freq > 933) { ++ freq = 933; ++ printk(BIOS_DEBUG, "%s: 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1866\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } ++ } else { ++ if (count > 1) { ++ if (highest_rank_count > 1) { ++ /* Limit to DDR3-1333 */ ++ if (freq > 666) { ++ freq = 666; ++ printk(BIOS_DEBUG, "%s: More than 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1333\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } else { ++ /* Limit to DDR3-1600 */ ++ if (freq > 800) { ++ freq = 800; ++ printk(BIOS_DEBUG, "%s: More than 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1600\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } ++ } else { ++ /* Limit to DDR3-1600 */ ++ if (freq > 800) { ++ freq = 800; ++ printk(BIOS_DEBUG, "%s: 1 unbuffered DIMM on %dmV channel; limiting to DDR3-1600\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } ++ } + } +- } else if (count == 2) { +- /* Limit to DDR3-1066 */ +- if (freq > 533) { +- freq = 533; +- print_tf(__func__, ": 2 registered DIMMs on channel; limiting to DDR3-1066\n"); ++ } ++ } else { ++ if (IS_ENABLED(CONFIG_DIMM_REGISTERED) && registered) { ++ /* K10 BKDG Rev. 3.62 Table 34 */ ++ if (count > 2) { ++ /* Limit to DDR3-800 */ ++ if (freq > 400) { ++ freq = 400; ++ printk(BIOS_DEBUG, "%s: More than 2 registered DIMMs on %dmV channel; limiting to DDR3-800\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } else if (count == 2) { ++ /* Limit to DDR3-1066 */ ++ if (freq > 533) { ++ freq = 533; ++ printk(BIOS_DEBUG, "%s: 2 registered DIMMs on %dmV channel; limiting to DDR3-1066\n", __func__, voltage_index_to_mv(voltage)); ++ } ++ } else { ++ /* Limit to DDR3-1333 */ ++ if (freq > 666) { ++ freq = 666; ++ printk(BIOS_DEBUG, "%s: 1 registered DIMM on %dmV channel; limiting to DDR3-1333\n", __func__, voltage_index_to_mv(voltage)); ++ } + } + } else { ++ /* K10 BKDG Rev. 3.62 Table 33 */ + /* Limit to DDR3-1333 */ + if (freq > 666) { + freq = 666; +- print_tf(__func__, ": 1 registered DIMM on channel; limiting to DDR3-1333\n"); ++ printk(BIOS_DEBUG, "%s: unbuffered DIMMs on %dmV channel; limiting to DDR3-1333\n", __func__, voltage_index_to_mv(voltage)); + } + } +- } else { +- /* K10 BKDG Rev. 3.62 Table 33 */ +- /* Limit to DDR3-1333 */ +- if (freq > 666) { +- freq = 666; +- print_tf(__func__, ": unbuffered DIMMs on channel; limiting to DDR3-1333\n"); +- } + } + } + +@@ -225,11 +481,13 @@ void mctGet_DIMMAddr(struct DCTStatStruc *pDCTstat, u32 node) + + } + ++#if IS_ENABLED(CONFIG_SET_FIDVID) + static u8 mctGetProcessorPackageType(void) { + /* FIXME: I guess this belongs wherever mctGetLogicalCPUID ends up ? */ +- u32 BrandId = cpuid_ebx(0x80000001); +- return (u8)((BrandId >> 28) & 0x0F); ++ u32 BrandId = cpuid_ebx(0x80000001); ++ return (u8)((BrandId >> 28) & 0x0F); + } ++#endif + + static void raminit_amdmct(struct sys_info *sysinfo) + { +diff --git a/src/northbridge/amd/amdht/h3ncmn.c b/src/northbridge/amd/amdht/h3ncmn.c +index 97f9db8..8f9177f 100644 +--- a/src/northbridge/amd/amdht/h3ncmn.c ++++ b/src/northbridge/amd/amdht/h3ncmn.c +@@ -43,6 +43,7 @@ + #define CPU_HTNB_FUNC_04 4 + #define CPU_ADDR_FUNC_01 1 + #define CPU_NB_FUNC_03 3 ++#define CPU_NB_FUNC_05 5 + + /* Function 0 registers */ + #define REG_ROUTE0_0X40 0x40 +@@ -70,6 +71,7 @@ + #define REG_NB_CPUID_3XFC 0xFC + #define REG_NB_LINK_XCS_TOKEN0_3X148 0x148 + #define REG_NB_DOWNCORE_3X190 0x190 ++#define REG_NB_CAPABILITY_5X84 0x84 + + /* Function 4 registers */ + +@@ -555,9 +557,10 @@ static u8 fam10GetNumCoresOnNode(u8 node, cNorthBridge *nb) + 15, 12, &temp); + + /* bits[15,13,12] specify the cores */ +- /* Support Downcoring */ + temp = ((temp & 8) >> 1) + (temp & 3); + cores = temp + 1; ++ ++ /* Support Downcoring */ + AmdPCIReadBits (MAKE_SBDFO(makePCISegmentFromNode(node), + makePCIBusFromNode(node), + makePCIDeviceFromNode(node), +@@ -576,6 +579,56 @@ static u8 fam10GetNumCoresOnNode(u8 node, cNorthBridge *nb) + + /***************************************************************************//** + * ++ * static u8 ++ * fam15GetNumCoresOnNode(u8 node, cNorthBridge *nb) ++ * ++ * Description: ++ * Return the number of cores (1 based count) on node. ++ * ++ * Parameters: ++ * @param[in] node = the node that will be examined ++ * @param[in] *nb = this northbridge ++ * @return = the number of cores ++ * ++ * ++ */ ++static u8 fam15GetNumCoresOnNode(u8 node, cNorthBridge *nb) ++{ ++ u32 temp, leveling, cores; ++ u8 i; ++ ++ ASSERT((node < nb->maxNodes)); ++ /* Read CmpCap [7:0] */ ++ AmdPCIReadBits(MAKE_SBDFO(makePCISegmentFromNode(node), ++ makePCIBusFromNode(node), ++ makePCIDeviceFromNode(node), ++ CPU_NB_FUNC_05, ++ REG_NB_CAPABILITY_5X84), ++ 7, 0, &temp); ++ ++ /* bits[7:0] specify the cores */ ++ temp = temp & 0xff; ++ cores = temp + 1; ++ ++ /* Support Downcoring */ ++ AmdPCIReadBits (MAKE_SBDFO(makePCISegmentFromNode(node), ++ makePCIBusFromNode(node), ++ makePCIDeviceFromNode(node), ++ CPU_NB_FUNC_03, ++ REG_NB_DOWNCORE_3X190), ++ 31, 0, &leveling); ++ for (i=0; i<cores; i++) ++ { ++ if (leveling & ((u32) 1 << i)) ++ { ++ temp--; ++ } ++ } ++ return (u8)(temp+1); ++} ++ ++/***************************************************************************//** ++ * + * static void + * setTotalNodesAndCores(u8 node, u8 totalNodes, u8 totalCores, cNorthBridge *nb) + * +@@ -854,6 +907,69 @@ static BOOL fam10IsCapable(u8 node, sMainData *pDat, cNorthBridge *nb) + + /***************************************************************************//** + * ++ * static BOOL ++ * fam15IsCapable(u8 node, sMainData *pDat, cNorthBridge *nb) ++ * ++ * Description: ++ * Get node capability and update the minimum supported system capability. ++ * Return whether the current configuration exceeds the capability. ++ * ++ * Parameters: ++ * @param[in] node = the node ++ * @param[in,out] *pDat = sysMpCap (updated) and NodesDiscovered ++ * @param[in] *nb = this northbridge ++ * @return true: system is capable of current config. ++ * false: system is not capable of current config. ++ * ++ * --------------------------------------------------------------------------------------- ++ */ ++static BOOL fam15IsCapable(u8 node, sMainData *pDat, cNorthBridge *nb) ++{ ++#ifndef HT_BUILD_NC_ONLY ++ u32 temp; ++ u8 maxNodes; ++ ++ ASSERT(node < nb->maxNodes); ++ ++ AmdPCIReadBits(MAKE_SBDFO(makePCISegmentFromNode(node), ++ makePCIBusFromNode(node), ++ makePCIDeviceFromNode(node), ++ CPU_NB_FUNC_03, ++ REG_NB_CAPABILITY_3XE8), ++ 18, 16, &temp); ++ ++ if (temp != 0) ++ { ++ maxNodes = (1 << (~temp & 0x3)); /* That is, 1, 2, 4, or 8 */ ++ } ++ else ++ { ++ /* Check if CPU package is dual node */ ++ AmdPCIReadBits(MAKE_SBDFO(makePCISegmentFromNode(node), ++ makePCIBusFromNode(node), ++ makePCIDeviceFromNode(node), ++ CPU_NB_FUNC_03, ++ REG_NB_CAPABILITY_3XE8), ++ 29, 29, &temp); ++ if (temp) ++ maxNodes = 4; ++ else ++ maxNodes = 8; ++ } ++ ++ if (pDat->sysMpCap > maxNodes) ++ { ++ pDat->sysMpCap = maxNodes; ++ } ++ /* Note since sysMpCap is one based and NodesDiscovered is zero based, equal is false */ ++ return (pDat->sysMpCap > pDat->NodesDiscovered); ++#else ++ return 1; ++#endif ++} ++ ++/***************************************************************************//** ++ * + * static void + * fam0fStopLink(u8 currentNode, u8 currentLink, cNorthBridge *nb) + * +@@ -2068,6 +2184,49 @@ void newNorthBridge(u8 node, cNorthBridge *nb) + u32 match; + u32 extFam, baseFam, model; + ++ cNorthBridge fam15 = ++ { ++#ifdef HT_BUILD_NC_ONLY ++ 8, ++ 1, ++ 12, ++#else ++ 8, ++ 8, ++ 64, ++#endif /* HT_BUILD_NC_ONLY*/ ++ writeRoutingTable, ++ writeNodeID, ++ readDefLnk, ++ enableRoutingTables, ++ verifyLinkIsCoherent, ++ readTrueLinkFailStatus, ++ readToken, ++ writeToken, ++ fam15GetNumCoresOnNode, ++ setTotalNodesAndCores, ++ limitNodes, ++ writeFullRoutingTable, ++ isCompatible, ++ fam15IsCapable, ++ (void (*)(u8, u8, cNorthBridge*))commonVoid, ++ (BOOL (*)(u8, u8, sMainData*, cNorthBridge*))commonReturnFalse, ++ readSbLink, ++ verifyLinkIsNonCoherent, ++ ht3SetCFGAddrMap, ++ convertBitsToWidth, ++ convertWidthToBits, ++ fam10NorthBridgeFreqMask, ++ gatherLinkData, ++ setLinkData, ++ ht3WriteTrafficDistribution, ++ fam10BufferOptimizations, ++ 0x00000001, ++ 0x00000200, ++ 18, ++ 0x00000f06 ++ }; ++ + cNorthBridge fam10 = + { + #ifdef HT_BUILD_NC_ONLY +@@ -2175,8 +2334,14 @@ void newNorthBridge(u8 node, cNorthBridge *nb) + 7, 4, &model); + match = (u32)((baseFam << 8) | extFam); + +- /* Test each in turn looking for a match. Init the struct if found */ +- if (match == fam10.compatibleKey) ++ /* Test each in turn looking for a match. ++ * Initialize the struct if found. ++ */ ++ if (match == fam15.compatibleKey) ++ { ++ Amdmemcpy((void *)nb, (const void *)&fam15, (u32) sizeof(cNorthBridge)); ++ } ++ else if (match == fam10.compatibleKey) + { + Amdmemcpy((void *)nb, (const void *)&fam10, (u32) sizeof(cNorthBridge)); + } +diff --git a/src/northbridge/amd/amdht/ht_wrapper.c b/src/northbridge/amd/amdht/ht_wrapper.c +index 389b1b1..c0ccc69 100644 +--- a/src/northbridge/amd/amdht/ht_wrapper.c ++++ b/src/northbridge/amd/amdht/ht_wrapper.c +@@ -174,16 +174,22 @@ void amd_ht_fixup(struct sys_info *sysinfo) { + printk(BIOS_DEBUG, "amd_ht_fixup()\n"); + if (IS_ENABLED(CONFIG_CPU_AMD_MODEL_10XXX)) { + uint8_t rev_gte_d = 0; ++ uint8_t fam15h = 0; + uint8_t dual_node = 0; + uint32_t f3xe8; + uint32_t family; + uint32_t model; + + family = model = cpuid_eax(0x80000001); +- model = ((model & 0xf0000) >> 16) | ((model & 0xf0) >> 4); ++ model = ((model & 0xf0000) >> 12) | ((model & 0xf0) >> 4); ++ family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8); + +- if (model >= 0x8) +- /* Revision D or later */ ++ if (family >= 0x6f) ++ /* Family 15h or later */ ++ fam15h = 1; ++ ++ if ((model >= 0x8) || fam15h) ++ /* Family 10h Revision D or later */ + rev_gte_d = 1; + + if (rev_gte_d) { +@@ -195,7 +201,8 @@ void amd_ht_fixup(struct sys_info *sysinfo) { + + if (dual_node) { + /* Each G34 processor contains a defective HT link. +- * See the BKDG Rev 3.62 section 2.7.1.5 for details. ++ * See the Family 10h BKDG Rev 3.62 section 2.7.1.5 for details ++ * For Family 15h see the BKDG Rev. 3.14 section 2.12.1.5 for details. + */ + uint8_t node; + uint8_t node_count = get_nodes(); +@@ -205,46 +212,46 @@ void amd_ht_fixup(struct sys_info *sysinfo) { + uint8_t internal_node_number = ((f3xe8 & 0xc0000000) >> 30); + printk(BIOS_DEBUG, "amd_ht_fixup(): node %d (internal node ID %d): disabling defective HT link\n", node, internal_node_number); + if (internal_node_number == 0) { +- uint8_t package_link_3_connected = pci_read_config32(NODE_PCI(node, 0), 0xd8) & 0x1; ++ uint8_t package_link_3_connected = pci_read_config32(NODE_PCI(node, 0), (fam15h)?0x98:0xd8) & 0x1; + if (package_link_3_connected) { + /* Set WidthIn and WidthOut to 0 */ +- dword = pci_read_config32(NODE_PCI(node, 0), 0xc4); ++ dword = pci_read_config32(NODE_PCI(node, 0), (fam15h)?0x84:0xc4); + dword &= ~0x77000000; +- pci_write_config32(NODE_PCI(node, 0), 0xc4, dword); ++ pci_write_config32(NODE_PCI(node, 0), (fam15h)?0x84:0xc4, dword); + /* Set Ganged to 1 */ +- dword = pci_read_config32(NODE_PCI(node, 0), 0x178); ++ dword = pci_read_config32(NODE_PCI(node, 0), (fam15h)?0x170:0x178); + dword |= 0x00000001; +- pci_write_config32(NODE_PCI(node, 0), 0x178, dword); ++ pci_write_config32(NODE_PCI(node, 0), (fam15h)?0x170:0x178, dword); + } else { + /* Set ConnDly to 1 */ + dword = pci_read_config32(NODE_PCI(node, 0), 0x16c); + dword |= 0x00000100; + pci_write_config32(NODE_PCI(node, 0), 0x16c, dword); + /* Set TransOff and EndOfChain to 1 */ +- dword = pci_read_config32(NODE_PCI(node, 4), 0xc4); ++ dword = pci_read_config32(NODE_PCI(node, 4), (fam15h)?0x84:0xc4); + dword |= 0x000000c0; +- pci_write_config32(NODE_PCI(node, 4), 0xc4, dword); ++ pci_write_config32(NODE_PCI(node, 4), (fam15h)?0x84:0xc4, dword); + } + } else if (internal_node_number == 1) { +- uint8_t package_link_3_connected = pci_read_config32(NODE_PCI(node, 0), 0xb8) & 0x1; ++ uint8_t package_link_3_connected = pci_read_config32(NODE_PCI(node, 0), (fam15h)?0xf8:0xb8) & 0x1; + if (package_link_3_connected) { + /* Set WidthIn and WidthOut to 0 */ +- dword = pci_read_config32(NODE_PCI(node, 0), 0xa4); ++ dword = pci_read_config32(NODE_PCI(node, 0), (fam15h)?0xe4:0xa4); + dword &= ~0x77000000; +- pci_write_config32(NODE_PCI(node, 0), 0xa4, dword); ++ pci_write_config32(NODE_PCI(node, 0), (fam15h)?0xe4:0xa4, dword); + /* Set Ganged to 1 */ +- dword = pci_read_config32(NODE_PCI(node, 0), 0x174); ++ dword = pci_read_config32(NODE_PCI(node, 0), (fam15h)?0x18c:0x174); + dword |= 0x00000001; +- pci_write_config32(NODE_PCI(node, 0), 0x174, dword); ++ pci_write_config32(NODE_PCI(node, 0), (fam15h)?0x18c:0x174, dword); + } else { + /* Set ConnDly to 1 */ + dword = pci_read_config32(NODE_PCI(node, 0), 0x16c); + dword |= 0x00000100; + pci_write_config32(NODE_PCI(node, 0), 0x16c, dword); + /* Set TransOff and EndOfChain to 1 */ +- dword = pci_read_config32(NODE_PCI(node, 4), 0xa4); ++ dword = pci_read_config32(NODE_PCI(node, 4), (fam15h)?0xe4:0xa4); + dword |= 0x000000c0; +- pci_write_config32(NODE_PCI(node, 4), 0xa4, dword); ++ pci_write_config32(NODE_PCI(node, 4), (fam15h)?0xe4:0xa4, dword); + } + } + } +diff --git a/src/northbridge/amd/amdmct/amddefs.h b/src/northbridge/amd/amdmct/amddefs.h +index 117fea5..20a77d3 100644 +--- a/src/northbridge/amd/amdmct/amddefs.h ++++ b/src/northbridge/amd/amdmct/amddefs.h +@@ -20,33 +20,35 @@ + /* FIXME: this file should be moved to include/cpu/amd/amddefs.h */ + + /* Public Revisions - USE THESE VERSIONS TO MAKE COMPARE WITH CPULOGICALID RETURN VALUE*/ +-#define AMD_SAFEMODE 0x80000000 /* Unknown future revision - SAFE MODE */ +-#define AMD_NPT_F0 0x00000001 /* F0 stepping */ +-#define AMD_NPT_F1 0x00000002 /* F1 stepping */ +-#define AMD_NPT_F2C 0x00000004 +-#define AMD_NPT_F2D 0x00000008 +-#define AMD_NPT_F2E 0x00000010 /* F2 stepping E */ +-#define AMD_NPT_F2G 0x00000020 /* F2 stepping G */ +-#define AMD_NPT_F2J 0x00000040 +-#define AMD_NPT_F2K 0x00000080 +-#define AMD_NPT_F3L 0x00000100 /* F3 Stepping */ +-#define AMD_NPT_G0A 0x00000200 /* G0 stepping */ +-#define AMD_NPT_G1B 0x00000400 /* G1 stepping */ +-#define AMD_DR_A0A 0x00010000 /* Barcelona A0 */ +-#define AMD_DR_A1B 0x00020000 /* Barcelona A1 */ +-#define AMD_DR_A2 0x00040000 /* Barcelona A2 */ +-#define AMD_DR_B0 0x00080000 /* Barcelona B0 */ +-#define AMD_DR_B1 0x00100000 /* Barcelona B1 */ +-#define AMD_DR_B2 0x00200000 /* Barcelona B2 */ +-#define AMD_DR_BA 0x00400000 /* Barcelona BA */ +-#define AMD_DR_B3 0x00800000 /* Barcelona B3 */ +-#define AMD_RB_C2 0x01000000 /* Shanghai C2 */ +-#define AMD_DA_C2 0x02000000 /* XXXX C2 */ +-#define AMD_HY_D0 0x04000000 /* Istanbul D0 */ +-#define AMD_RB_C3 0x08000000 /* ??? C3 */ +-#define AMD_DA_C3 0x10000000 /* XXXX C3 */ +-#define AMD_HY_D1 0x20000000 /* Istanbul D1 */ +-#define AMD_PH_E0 0x40000000 /* Phenom II X4 X6 */ ++#define AMD_SAFEMODE 0x8000000000000000 /* Unknown future revision - SAFE MODE */ ++#define AMD_NPT_F0 0x0000000000000001 /* F0 stepping */ ++#define AMD_NPT_F1 0x0000000000000002 /* F1 stepping */ ++#define AMD_NPT_F2C 0x0000000000000004 ++#define AMD_NPT_F2D 0x0000000000000008 ++#define AMD_NPT_F2E 0x0000000000000010 /* F2 stepping E */ ++#define AMD_NPT_F2G 0x0000000000000020 /* F2 stepping G */ ++#define AMD_NPT_F2J 0x0000000000000040 ++#define AMD_NPT_F2K 0x0000000000000080 ++#define AMD_NPT_F3L 0x0000000000000100 /* F3 Stepping */ ++#define AMD_NPT_G0A 0x0000000000000200 /* G0 stepping */ ++#define AMD_NPT_G1B 0x0000000000000400 /* G1 stepping */ ++#define AMD_DR_A0A 0x0000000000010000 /* Barcelona A0 */ ++#define AMD_DR_A1B 0x0000000000020000 /* Barcelona A1 */ ++#define AMD_DR_A2 0x0000000000040000 /* Barcelona A2 */ ++#define AMD_DR_B0 0x0000000000080000 /* Barcelona B0 */ ++#define AMD_DR_B1 0x0000000000100000 /* Barcelona B1 */ ++#define AMD_DR_B2 0x0000000000200000 /* Barcelona B2 */ ++#define AMD_DR_BA 0x0000000000400000 /* Barcelona BA */ ++#define AMD_DR_B3 0x0000000000800000 /* Barcelona B3 */ ++#define AMD_RB_C2 0x0000000001000000 /* Shanghai C2 */ ++#define AMD_DA_C2 0x0000000002000000 /* XXXX C2 */ ++#define AMD_HY_D0 0x0000000004000000 /* Istanbul D0 */ ++#define AMD_RB_C3 0x0000000008000000 /* ??? C3 */ ++#define AMD_DA_C3 0x0000000010000000 /* XXXX C3 */ ++#define AMD_HY_D1 0x0000000020000000 /* Istanbul D1 */ ++#define AMD_PH_E0 0x0000000040000000 /* Phenom II X4 X6 */ ++#define AMD_OR_B2 0x0000000080000000 /* Interlagos */ ++#define AMD_OR_C0 0x0000000100000000 /* Abu Dhabi */ + + /* + * Groups - Create as many as you wish, from the above public values +@@ -76,6 +78,7 @@ + #define AMD_DRBH_Cx (AMD_DR_Cx | AMD_HY_D0 ) + #define AMD_DRBA23_RBC2 (AMD_DR_BA | AMD_DR_B2 | AMD_DR_B3 | AMD_RB_C2 ) + #define AMD_DR_DAC2_OR_C3 (AMD_DA_C2 | AMD_DA_C3 | AMD_RB_C3) ++#define AMD_FAM15_ALL (AMD_OR_B2 | AMD_OR_C0) + + /* + * Public Platforms - USE THESE VERSIONS TO MAKE COMPARE WITH CPUPLATFORMTYPE RETURN VALUE +@@ -122,23 +125,34 @@ + */ + #define CPUID_EXT_PM 0x80000007 + #define CPUID_MODEL 1 +-#define MCG_CAP 0x00000179 ++#define MCG_CAP 0x00000179 + #define MCG_CTL_P 8 +-#define MC0_CTL 0x00000400 +-#define MC0_STA MC0_CTL + 1 +-#define FS_Base 0xC0000100 ++#define MC0_CTL 0x00000400 ++#define MC0_STA (MC0_CTL + 1) ++#define MC4_MISC0 0x00000413 ++#define MC4_MISC1 0xC0000408 ++#define MC4_MISC2 0xC0000409 ++#define FS_Base 0xC0000100 + #define SYSCFG 0xC0010010 + #define HWCR 0xC0010015 + #define NB_CFG 0xC001001F + #define FidVidStatus 0xC0010042 ++#define MC1_CTL_MASK 0xC0010045 + #define MC4_CTL_MASK 0xC0010048 + #define OSVW_ID_Length 0xC0010140 + #define OSVW_Status 0xC0010141 + #define CPUIDFEATURES 0xC0011004 + #define LS_CFG 0xC0011020 ++#define IC_CFG 0xC0011021 + #define DC_CFG 0xC0011022 + #define BU_CFG 0xC0011023 +-#define BU_CFG2 0xC001102A ++#define FP_CFG 0xC0011028 ++#define DE_CFG 0xC0011029 ++#define BU_CFG2 0xC001102A ++#define BU_CFG3 0xC001102B ++#define EX_CFG 0xC001102C ++#define LS_CFG2 0xC001102D ++#define IBS_OP_DATA3 0xC0011037 + + /* + * Processor package types +diff --git a/src/northbridge/amd/amdmct/mct/mct_d.c b/src/northbridge/amd/amdmct/mct/mct_d.c +index 88910e2..be0af65 100644 +--- a/src/northbridge/amd/amdmct/mct/mct_d.c ++++ b/src/northbridge/amd/amdmct/mct/mct_d.c +@@ -2189,6 +2189,7 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat, + pDCTstat->DimmManufacturerID[i] |= ((uint64_t)mctRead_SPD(smbaddr, SPD_MANID_START + k)) << (k * 8); + for (k = 0; k < SPD_PARTN_LENGTH; k++) + pDCTstat->DimmPartNumber[i][k] = mctRead_SPD(smbaddr, SPD_PARTN_START + k); ++ pDCTstat->DimmPartNumber[i][SPD_PARTN_LENGTH] = 0; + pDCTstat->DimmRevisionNumber[i] = 0; + for (k = 0; k < 2; k++) + pDCTstat->DimmRevisionNumber[i] |= ((uint16_t)mctRead_SPD(smbaddr, SPD_REVNO_START + k)) << (k * 8); +@@ -2206,8 +2207,7 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat, + if (byte & JED_REGADCMSK) { + RegDIMMPresent |= 1 << i; + pDCTstat->DimmRegistered[i] = 1; +- } +- else { ++ } else { + pDCTstat->DimmRegistered[i] = 0; + } + /* Check ECC capable */ +diff --git a/src/northbridge/amd/amdmct/mct/mct_d.h b/src/northbridge/amd/amdmct/mct/mct_d.h +index 132bdc9..6b6194d 100644 +--- a/src/northbridge/amd/amdmct/mct/mct_d.h ++++ b/src/northbridge/amd/amdmct/mct/mct_d.h +@@ -434,7 +434,7 @@ struct DCTStatStruc { /* A per Node structure*/ + /* CH A byte lane 0 - 7 maximum filtered window passing DQS delay value*/ + /* CH B byte lane 0 - 7 minimum filtered window passing DQS delay value*/ + /* CH B byte lane 0 - 7 maximum filtered window passing DQS delay value*/ +- u32 LogicalCPUID; /* The logical CPUID of the node*/ ++ uint64_t LogicalCPUID; /* The logical CPUID of the node*/ + u16 HostBiosSrvc1; /* Word sized general purpose field for use by host BIOS. Scratch space.*/ + u32 HostBiosSrvc2; /* Dword sized general purpose field for use by host BIOS. Scratch space.*/ + u16 DimmQRPresent; /* QuadRank DIMM present?*/ +@@ -529,7 +529,7 @@ struct DCTStatStruc { /* A per Node structure*/ + uint8_t DimmRegistered[MAX_DIMMS_SUPPORTED]; + + uint64_t DimmManufacturerID[MAX_DIMMS_SUPPORTED]; +- char DimmPartNumber[MAX_DIMMS_SUPPORTED][SPD_PARTN_LENGTH]; ++ char DimmPartNumber[MAX_DIMMS_SUPPORTED][SPD_PARTN_LENGTH+1]; + uint16_t DimmRevisionNumber[MAX_DIMMS_SUPPORTED]; + uint32_t DimmSerialNumber[MAX_DIMMS_SUPPORTED]; + } __attribute__((packed)); +@@ -598,17 +598,18 @@ struct DCTStatStruc { /* A per Node structure*/ + 266=266MHz (DDR533) + 333=333MHz (DDR667) + 400=400MHz (DDR800)*/ +-#define NV_ECC_CAP 4 /* Bus ECC capable (1-bits) ++#define NV_MIN_MEMCLK 4 /* Minimum platform demonstrated Memclock (10-bits) */ ++#define NV_ECC_CAP 5 /* Bus ECC capable (1-bits) + 0=Platform not capable + 1=Platform is capable*/ +-#define NV_4RANKType 5 /* Quad Rank DIMM slot type (2-bits) ++#define NV_4RANKType 6 /* Quad Rank DIMM slot type (2-bits) + 0=Normal + 1=R4 (4-Rank Registered DIMMs in AMD server configuration) + 2=S4 (Unbuffered SO-DIMMs)*/ +-#define NV_BYPMAX 6 /* Value to set DcqBypassMax field (See Function 2, Offset 94h, [27:24] of BKDG for field definition). ++#define NV_BYPMAX 7 /* Value to set DcqBypassMax field (See Function 2, Offset 94h, [27:24] of BKDG for field definition). + 4=4 times bypass (normal for non-UMA systems) + 7=7 times bypass (normal for UMA systems)*/ +-#define NV_RDWRQBYP 7 /* Value to set RdWrQByp field (See Function 2, Offset A0h, [3:2] of BKDG for field definition). ++#define NV_RDWRQBYP 8 /* Value to set RdWrQByp field (See Function 2, Offset A0h, [3:2] of BKDG for field definition). + 2=8 times (normal for non-UMA systems) + 3=16 times (normal for UMA systems)*/ + +@@ -671,8 +672,9 @@ struct DCTStatStruc { /* A per Node structure*/ + #define NV_ECCRedir 54 /* Dram ECC Redirection enable*/ + #define NV_DramBKScrub 55 /* Dram ECC Background Scrubber CTL*/ + #define NV_L2BKScrub 56 /* L2 ECC Background Scrubber CTL*/ +-#define NV_DCBKScrub 57 /* DCache ECC Background Scrubber CTL*/ +-#define NV_CS_SpareCTL 58 /* Chip Select Spare Control bit 0: ++#define NV_L3BKScrub 57 /* L3 ECC Background Scrubber CTL*/ ++#define NV_DCBKScrub 58 /* DCache ECC Background Scrubber CTL*/ ++#define NV_CS_SpareCTL 59 /* Chip Select Spare Control bit 0: + 0=disable Spare + 1=enable Spare */ + /* Chip Select Spare Control bit 1-4: +@@ -712,7 +714,7 @@ u8 mct_Get_Start_RcvrEnDly_1Pass(u8 Pass); + u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 RcvrEnDlyLimit, u8 Channel, u8 Receiver, u8 Pass); + void CPUMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); + void UMAMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); +-u32 mctGetLogicalCPUID(u32 Node); ++uint64_t mctGetLogicalCPUID(u32 Node); + u8 ECCInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); + void TrainReceiverEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA, u8 Pass); + void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); +diff --git a/src/northbridge/amd/amdmct/mct/mctpro_d.c b/src/northbridge/amd/amdmct/mct/mctpro_d.c +index c332357..fe56201 100644 +--- a/src/northbridge/amd/amdmct/mct/mctpro_d.c ++++ b/src/northbridge/amd/amdmct/mct/mctpro_d.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -23,7 +24,7 @@ void EarlySampleSupport_D(void) + + u32 procOdtWorkaround(struct DCTStatStruc *pDCTstat, u32 dct, u32 val) + { +- u32 tmp; ++ uint64_t tmp; + tmp = pDCTstat->LogicalCPUID; + if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) { + val &= 0x0FFFFFFF; +@@ -42,7 +43,7 @@ u32 OtherTiming_A_D(struct DCTStatStruc *pDCTstat, u32 val) + * ( F2x[1, 0]8C[1:0] > 00b). Silicon Status: Fixed in Rev B + * FIXME: check if this is still required. + */ +- u32 tmp; ++ uint64_t tmp; + tmp = pDCTstat->LogicalCPUID; + if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) { + if(!(val & (3<<12) )) +@@ -54,7 +55,7 @@ u32 OtherTiming_A_D(struct DCTStatStruc *pDCTstat, u32 val) + + void mct_ForceAutoPrecharge_D(struct DCTStatStruc *pDCTstat, u32 dct) + { +- u32 tmp; ++ uint64_t tmp; + u32 reg; + u32 reg_off; + u32 dev; +@@ -96,7 +97,7 @@ void mct_EndDQSTraining_D(struct MCTStatStruc *pMCTstat, + * FIXME: check this. + */ + +- u32 tmp; ++ uint64_t tmp; + u32 dev; + u32 reg; + u32 val; +@@ -143,10 +144,9 @@ void mct_BeforeDQSTrain_Samp_D(struct MCTStatStruc *pMCTstat, + u32 index; + u32 reg; + u32 val; +- u32 tmp; ++ uint64_t tmp; + u32 Channel; + +- + tmp = pDCTstat->LogicalCPUID; + if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) { + +@@ -206,7 +206,7 @@ u32 Modify_D3CMP(struct DCTStatStruc *pDCTstat, u32 dct, u32 value) + u32 index_reg; + u32 index; + u32 val; +- u32 tmp; ++ uint64_t tmp; + + tmp = pDCTstat->LogicalCPUID; + if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) { +@@ -237,7 +237,7 @@ void SyncSetting(struct DCTStatStruc *pDCTstat) + * Silicon Status: Fix TBD + */ + +- u32 tmp; ++ uint64_t tmp; + tmp = pDCTstat->LogicalCPUID; + if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) { + pDCTstat->CH_ODC_CTL[1] = pDCTstat->CH_ODC_CTL[0]; +@@ -278,7 +278,7 @@ u32 CheckNBCOFAutoPrechg(struct DCTStatStruc *pDCTstat, u32 dct) + + void mct_BeforeDramInit_D(struct DCTStatStruc *pDCTstat, u32 dct) + { +- u32 tmp; ++ uint64_t tmp; + u32 Speed; + u32 ch, ch_start, ch_end; + u32 index_reg; +@@ -286,7 +286,6 @@ void mct_BeforeDramInit_D(struct DCTStatStruc *pDCTstat, u32 dct) + u32 dev; + u32 val; + +- + tmp = pDCTstat->LogicalCPUID; + if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) { + Speed = pDCTstat->Speed; +@@ -331,7 +330,7 @@ static u8 mct_checkFenceHoleAdjust_D(struct MCTStatStruc *pMCTstat, + u8 ChipSel, u8 *result) + { + u8 ByteLane; +- u32 tmp; ++ uint64_t tmp; + + tmp = pDCTstat->LogicalCPUID; + if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) { +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c +index 12dfff1..74066b1 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c +@@ -75,6 +75,8 @@ static void StitchMemory_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); + static u16 Get_Fk_D(u8 k); + static u8 Get_DIMMAddress_D(struct DCTStatStruc *pDCTstat, u8 i); ++static void mct_preInitDCT(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat); + static void mct_initDCT(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); + static void mct_DramInit(struct MCTStatStruc *pMCTstat, +@@ -105,11 +107,11 @@ static void Get_TrwtTO(struct MCTStatStruc *pMCTstat, + static void Get_TrwtWB(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); + static void Get_DqsRcvEnGross_Diff(struct DCTStatStruc *pDCTstat, +- u32 dev, u32 index_reg); ++ u32 dev, uint8_t dct, u32 index_reg); + static void Get_WrDatGross_Diff(struct DCTStatStruc *pDCTstat, u8 dct, + u32 dev, u32 index_reg); + static u16 Get_DqsRcvEnGross_MaxMin(struct DCTStatStruc *pDCTstat, +- u32 dev, u32 index_reg, u32 index); ++ u32 dev, uint8_t dct, u32 index_reg, u32 index); + static void mct_FinalMCT_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); + static u16 Get_WrDatGross_MaxMin(struct DCTStatStruc *pDCTstat, u8 dct, +@@ -128,6 +130,8 @@ static void SetCKETriState(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); + static void SetODTTriState(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); ++static void InitDDRPhy(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, u8 dct); + static void InitPhyCompensation(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); + static u32 mct_NodePresent_D(void); +@@ -138,7 +142,9 @@ static void mct_ResetDataStruct_D(struct MCTStatStruc *pMCTstat, + static void mct_EarlyArbEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); + static void mct_BeforeDramInit_Prod_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat); ++ struct DCTStatStruc *pDCTstat, u8 dct); ++static void mct_ProgramODT_D(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, u8 dct); + void mct_ClrClToNB_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); + static u8 CheckNBCOFEarlyArbEn(struct MCTStatStruc *pMCTstat, +@@ -158,6 +164,10 @@ static u32 mct_DisDllShutdownSR(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u32 DramConfigLo, u8 dct); + static void mct_EnDllShutdownSR(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); ++static void ChangeMemClk(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat); ++void SetTargetFreq(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat); + + static u32 mct_MR1Odt_RDimm(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct, u32 MrsChipSel); +@@ -165,7 +175,8 @@ static u32 mct_DramTermDyn_RDimm(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dimm); + static u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2); + static void mct_BeforeDQSTrainSamp(struct DCTStatStruc *pDCTstat); +-static void mct_WriteLevelization_HW(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); ++static void mct_WriteLevelization_HW(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstatA, uint8_t Pass); + static u8 Get_Latency_Diff(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); + static void SyncSetting(struct DCTStatStruc *pDCTstat); +@@ -173,6 +184,12 @@ static u8 crcCheck(u8 smbaddr); + static void mct_ExtMCTConfig_Bx(struct DCTStatStruc *pDCTstat); + static void mct_ExtMCTConfig_Cx(struct DCTStatStruc *pDCTstat); + ++static void read_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, ++ uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg); ++ ++static void read_dqs_write_timing_control_registers(uint16_t* current_total_delay, ++ uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg); ++ + /*See mctAutoInitMCT header for index relationships to CL and T*/ + static const u16 Table_F_k[] = {00,200,266,333,400,533 }; + static const u8 Tab_BankAddr[] = {0x3F,0x01,0x09,0x3F,0x3F,0x11,0x0A,0x19,0x12,0x1A,0x21,0x22,0x23}; +@@ -223,6 +240,936 @@ static const u8 Table_Comp_Rise_Slew_15x[] = {7, 7, 3, 2, 0xFF}; + static const u8 Table_Comp_Fall_Slew_20x[] = {7, 5, 3, 2, 0xFF}; + static const u8 Table_Comp_Fall_Slew_15x[] = {7, 7, 5, 3, 0xFF}; + ++static uint8_t dct_ddr_voltage_index(struct DCTStatStruc *pDCTstat, uint8_t dct) ++{ ++ uint8_t dimm; ++ uint8_t ddr_voltage_index = 0; ++ ++ /* Find current DDR supply voltage for this DCT */ ++ for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm++) { ++ if (pDCTstat->DIMMValidDCT[dct] & (1 << dimm)) ++ ddr_voltage_index |= pDCTstat->DimmConfiguredVoltage[dimm]; ++ } ++ if (ddr_voltage_index > 0x7) { ++ printk(BIOS_DEBUG, "%s: Insufficient DDR supply voltage indicated! Configuring processor for 1.25V operation, but this attempt may fail...\n", __func__); ++ ddr_voltage_index = 0x4; ++ } ++ if (ddr_voltage_index == 0x0) { ++ printk(BIOS_DEBUG, "%s: No DDR supply voltage indicated! Configuring processor for 1.5V operation, but this attempt may fail...\n", __func__); ++ ddr_voltage_index = 0x1; ++ } ++ ++ return ddr_voltage_index; ++} ++ ++static uint16_t fam15h_mhz_to_memclk_config(uint16_t freq) ++{ ++ uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933}; ++ uint16_t iter; ++ ++ /* Compute the index value for the given frequency */ ++ for (iter = 0; iter <= 0x16; iter++) { ++ if (fam15h_freq_tab[iter] == freq) ++ break; ++ } ++ if (fam15h_freq_tab[iter] == freq) ++ freq = iter; ++ if (freq == 0) ++ freq = 0x4; ++ ++ return freq; ++} ++ ++static uint16_t fam10h_mhz_to_memclk_config(uint16_t freq) ++{ ++ uint16_t fam10h_freq_tab[] = {0, 0, 0, 400, 533, 667, 800}; ++ uint16_t iter; ++ ++ /* Compute the index value for the given frequency */ ++ for (iter = 0; iter <= 0x6; iter++) { ++ if (fam10h_freq_tab[iter] == freq) ++ break; ++ } ++ if (fam10h_freq_tab[iter] == freq) ++ freq = iter; ++ if (freq == 0) ++ freq = 0x3; ++ ++ return freq; ++} ++ ++static uint16_t mhz_to_memclk_config(uint16_t freq) ++{ ++ if (is_fam15h()) ++ return fam15h_mhz_to_memclk_config(freq); ++ else ++ return fam10h_mhz_to_memclk_config(freq) + 1; ++} ++ ++static uint32_t fam15h_phy_predriver_calibration_code(struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t drive_strength) ++{ ++ uint8_t lrdimm = 0; ++ uint8_t package_type; ++ uint8_t ddr_voltage_index; ++ uint32_t calibration_code = 0; ++ uint16_t MemClkFreq = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f; ++ ++ ddr_voltage_index = dct_ddr_voltage_index(pDCTstat, dct); ++ package_type = mctGet_NVbits(NV_PACK_TYPE); ++ ++ if (!lrdimm) { ++ /* Not an LRDIMM */ ++ if ((package_type == PT_M2) || (package_type == PT_GR)) { ++ /* Socket AM3 or G34 */ ++ if (ddr_voltage_index & 0x4) { ++ /* 1.25V */ ++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 43 */ ++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) { ++ /* DDR3-667 - DDR3-800 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xb6d; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x924; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x6db; ++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) { ++ /* DDR3-1066 - DDR3-1333 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x2) ++ calibration_code = 0xdb6; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x924; ++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) { ++ /* DDR3-1600 - DDR3-1866 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x2) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x3) ++ calibration_code = 0xfff; ++ } ++ } ++ else if (ddr_voltage_index & 0x2) { ++ /* 1.35V */ ++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 42 */ ++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) { ++ /* DDR3-667 - DDR3-800 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x1) ++ calibration_code = 0x924; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x6db; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x492; ++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) { ++ /* DDR3-1066 - DDR3-1333 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xdb6; ++ else if (drive_strength == 0x2) ++ calibration_code = 0xbd6; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x6db; ++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) { ++ /* DDR3-1600 - DDR3-1866 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x2) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x3) ++ calibration_code = 0xdb6; ++ } ++ } ++ else if (ddr_voltage_index & 0x1) { ++ /* 1.5V */ ++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 41 */ ++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) { ++ /* DDR3-667 - DDR3-800 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xb6d; ++ else if (drive_strength == 0x1) ++ calibration_code = 0x6db; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x492; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x492; ++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) { ++ /* DDR3-1066 - DDR3-1333 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x1) ++ calibration_code = 0x924; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x6db; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x6db; ++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) { ++ /* DDR3-1600 - DDR3-1866 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x2) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x3) ++ calibration_code = 0xb6d; ++ } ++ } ++ } ++ else if (package_type == PT_C3) { ++ /* Socket C32 */ ++ if (ddr_voltage_index & 0x4) { ++ /* 1.25V */ ++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 46 */ ++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) { ++ /* DDR3-667 - DDR3-800 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xb6d; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x924; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x6db; ++ } else if (MemClkFreq == 0xa) { ++ /* DDR3-1066 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x2) ++ calibration_code = 0xdb6; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x924; ++ } else if (MemClkFreq == 0xe) { ++ /* DDR3-1333 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xb6d; ++ else if (drive_strength == 0x1) ++ calibration_code = 0x6db; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x492; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x492; ++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) { ++ /* DDR3-1600 - DDR3-1866 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x2) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x3) ++ calibration_code = 0xfff; ++ } ++ } ++ else if (ddr_voltage_index & 0x2) { ++ /* 1.35V */ ++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 45 */ ++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) { ++ /* DDR3-667 - DDR3-800 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x1) ++ calibration_code = 0x924; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x6db; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x492; ++ } else if (MemClkFreq == 0xa) { ++ /* DDR3-1066 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xdb6; ++ else if (drive_strength == 0x2) ++ calibration_code = 0xb6d; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x6db; ++ } else if (MemClkFreq == 0xe) { ++ /* DDR3-1333 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x1) ++ calibration_code = 0x924; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x6db; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x492; ++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) { ++ /* DDR3-1600 - DDR3-1866 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x2) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x3) ++ calibration_code = 0xdb6; ++ } ++ } ++ else if (ddr_voltage_index & 0x1) { ++ /* 1.5V */ ++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 44 */ ++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) { ++ /* DDR3-667 - DDR3-800 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xb6d; ++ else if (drive_strength == 0x1) ++ calibration_code = 0x6db; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x492; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x492; ++ } else if (MemClkFreq == 0xa) { ++ /* DDR3-1066 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x1) ++ calibration_code = 0x924; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x6db; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x6db; ++ } else if (MemClkFreq == 0xe) { ++ /* DDR3-1333 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xb6d; ++ else if (drive_strength == 0x1) ++ calibration_code = 0x6db; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x492; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x492; ++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) { ++ /* DDR3-1600 - DDR3-1866 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x2) ++ calibration_code = 0xfff; ++ else if (drive_strength == 0x3) ++ calibration_code = 0xb6d; ++ } ++ } ++ } ++ } else { ++ /* LRDIMM */ ++ ++ /* TODO ++ * Implement LRDIMM support ++ * See Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Tables 47 - 49 ++ */ ++ } ++ ++ return calibration_code; ++} ++ ++static uint32_t fam15h_phy_predriver_cmd_addr_calibration_code(struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t drive_strength) ++{ ++ uint8_t ddr_voltage_index; ++ uint32_t calibration_code = 0; ++ uint16_t MemClkFreq = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f; ++ ++ ddr_voltage_index = dct_ddr_voltage_index(pDCTstat, dct); ++ ++ if (ddr_voltage_index & 0x4) { ++ /* 1.25V */ ++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 52 */ ++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) { ++ /* DDR3-667 - DDR3-800 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0x492; ++ else if (drive_strength == 0x1) ++ calibration_code = 0x492; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x492; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x492; ++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) { ++ /* DDR3-1066 - DDR3-1333 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xdad; ++ else if (drive_strength == 0x1) ++ calibration_code = 0x924; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x6db; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x492; ++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) { ++ /* DDR3-1600 - DDR3-1866 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xdad; ++ else if (drive_strength == 0x2) ++ calibration_code = 0xb64; ++ else if (drive_strength == 0x3) ++ calibration_code = 0xb64; ++ } ++ } ++ else if (ddr_voltage_index & 0x2) { ++ /* 1.35V */ ++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 51 */ ++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) { ++ /* DDR3-667 - DDR3-800 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0x492; ++ else if (drive_strength == 0x1) ++ calibration_code = 0x492; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x492; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x492; ++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) { ++ /* DDR3-1066 - DDR3-1333 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0x924; ++ else if (drive_strength == 0x1) ++ calibration_code = 0x6db; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x6db; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x6db; ++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) { ++ /* DDR3-1600 - DDR3-1866 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xb6d; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xb6d; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x924; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x924; ++ } ++ } ++ else if (ddr_voltage_index & 0x1) { ++ /* 1.5V */ ++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 50 */ ++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) { ++ /* DDR3-667 - DDR3-800 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0x492; ++ else if (drive_strength == 0x1) ++ calibration_code = 0x492; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x492; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x492; ++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) { ++ /* DDR3-1066 - DDR3-1333 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0x6db; ++ else if (drive_strength == 0x1) ++ calibration_code = 0x6db; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x6db; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x6db; ++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) { ++ /* DDR3-1600 - DDR3-1866 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xb6d; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xb6d; ++ else if (drive_strength == 0x2) ++ calibration_code = 0xb6d; ++ else if (drive_strength == 0x3) ++ calibration_code = 0xb6d; ++ } ++ } ++ ++ return calibration_code; ++} ++ ++static uint32_t fam15h_phy_predriver_clk_calibration_code(struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t drive_strength) ++{ ++ uint8_t ddr_voltage_index; ++ uint32_t calibration_code = 0; ++ uint16_t MemClkFreq = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f; ++ ++ ddr_voltage_index = dct_ddr_voltage_index(pDCTstat, dct); ++ ++ if (ddr_voltage_index & 0x4) { ++ /* 1.25V */ ++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 55 */ ++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) { ++ /* DDR3-667 - DDR3-800 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xdad; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xdad; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x924; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x924; ++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) { ++ /* DDR3-1066 - DDR3-1333 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x2) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x3) ++ calibration_code = 0xff6; ++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) { ++ /* DDR3-1600 - DDR3-1866 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x2) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x3) ++ calibration_code = 0xff6; ++ } ++ } ++ else if (ddr_voltage_index & 0x2) { ++ /* 1.35V */ ++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 54 */ ++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) { ++ /* DDR3-667 - DDR3-800 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xdad; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xdad; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x924; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x924; ++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) { ++ /* DDR3-1066 - DDR3-1333 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x2) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x3) ++ calibration_code = 0xdad; ++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) { ++ /* DDR3-1600 - DDR3-1866 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x2) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x3) ++ calibration_code = 0xdad; ++ } ++ } ++ else if (ddr_voltage_index & 0x1) { ++ /* 1.5V */ ++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 53 */ ++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6)) { ++ /* DDR3-667 - DDR3-800 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0x924; ++ else if (drive_strength == 0x1) ++ calibration_code = 0x924; ++ else if (drive_strength == 0x2) ++ calibration_code = 0x924; ++ else if (drive_strength == 0x3) ++ calibration_code = 0x924; ++ } else if ((MemClkFreq == 0xa) || (MemClkFreq == 0xe)) { ++ /* DDR3-1066 - DDR3-1333 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x2) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x3) ++ calibration_code = 0xb6d; ++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) { ++ /* DDR3-1600 - DDR3-1866 */ ++ if (drive_strength == 0x0) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x1) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x2) ++ calibration_code = 0xff6; ++ else if (drive_strength == 0x3) ++ calibration_code = 0xff6; ++ } ++ } ++ ++ return calibration_code; ++} ++ ++static uint32_t fam15h_output_driver_compensation_code(struct DCTStatStruc *pDCTstat, uint8_t dct) ++{ ++ /* FIXME ++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel ++ * For now assume a maximum of 2 DIMMs per channel can be installed ++ */ ++ uint8_t MaxDimmsInstallable = 2; ++ ++ uint8_t package_type; ++ uint32_t calibration_code = 0; ++ ++ package_type = mctGet_NVbits(NV_PACK_TYPE); ++ uint16_t MemClkFreq = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f; ++ ++ /* Obtain number of DIMMs on channel */ ++ uint8_t dimm_count = pDCTstat->MAdimms[dct]; ++ uint8_t rank_count_dimm0; ++ uint8_t rank_count_dimm1; ++ ++ if (package_type == PT_GR) { ++ /* Socket G34 */ ++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 73 */ ++ if (MaxDimmsInstallable == 1) { ++ if (MemClkFreq == 0x4) { ++ /* DDR3-667 */ ++ calibration_code = 0x00112222; ++ } ++ else if (MemClkFreq == 0x6) { ++ /* DDR3-800 */ ++ calibration_code = 0x10112222; ++ } ++ else if (MemClkFreq == 0xa) { ++ /* DDR3-1066 */ ++ calibration_code = 0x20112222; ++ } ++ else if ((MemClkFreq == 0xe) || (MemClkFreq == 0x12)) { ++ /* DDR3-1333 - DDR3-1600 */ ++ calibration_code = 0x30112222; ++ } ++ else if (MemClkFreq == 0x16) { ++ /* DDR3-1866 */ ++ calibration_code = 0x30332222; ++ } ++ } else if (MaxDimmsInstallable == 2) { ++ if (dimm_count == 1) { ++ /* 1 DIMM detected */ ++ if (MemClkFreq == 0x4) { ++ /* DDR3-667 */ ++ calibration_code = 0x00112222; ++ } ++ else if (MemClkFreq == 0x6) { ++ /* DDR3-800 */ ++ calibration_code = 0x10112222; ++ } ++ else if (MemClkFreq == 0xa) { ++ /* DDR3-1066 */ ++ calibration_code = 0x20112222; ++ } ++ else if ((MemClkFreq == 0xe) || (MemClkFreq == 0x12)) { ++ /* DDR3-1333 - DDR3-1600 */ ++ calibration_code = 0x30112222; ++ } ++ } else if (dimm_count == 2) { ++ /* 2 DIMMs detected */ ++ rank_count_dimm0 = pDCTstat->C_DCTPtr[dct]->DimmRanks[0]; ++ rank_count_dimm1 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1]; ++ ++ if (MemClkFreq == 0x4) { ++ /* DDR3-667 */ ++ calibration_code = 0x10222222; ++ } ++ else if (MemClkFreq == 0x6) { ++ /* DDR3-800 */ ++ calibration_code = 0x20222222; ++ } ++ else if (MemClkFreq == 0xa) { ++ /* DDR3-1066 */ ++ calibration_code = 0x30222222; ++ } ++ else if (MemClkFreq == 0xe) { ++ /* DDR3-1333 */ ++ calibration_code = 0x30222222; ++ } ++ else if (MemClkFreq == 0x12) { ++ /* DDR3-1600 */ ++ if ((rank_count_dimm0 == 1) && (rank_count_dimm1 == 1)) ++ calibration_code = 0x30222222; ++ else ++ calibration_code = 0x30112222; ++ } ++ } ++ } else if (MaxDimmsInstallable == 3) { ++ /* TODO ++ * 3 DIMM/channel support unimplemented ++ */ ++ } ++ } else { ++ /* TODO ++ * Other socket support unimplemented ++ */ ++ } ++ ++ return calibration_code; ++} ++ ++static uint32_t fam15h_address_timing_compensation_code(struct DCTStatStruc *pDCTstat, uint8_t dct) ++{ ++ /* FIXME ++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel ++ * For now assume a maximum of 2 DIMMs per channel can be installed ++ */ ++ uint8_t MaxDimmsInstallable = 2; ++ ++ uint8_t package_type; ++ uint32_t calibration_code = 0; ++ ++ package_type = mctGet_NVbits(NV_PACK_TYPE); ++ uint16_t MemClkFreq = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f; ++ ++ /* Obtain number of DIMMs on channel */ ++ uint8_t dimm_count = pDCTstat->MAdimms[dct]; ++ uint8_t rank_count_dimm0; ++ uint8_t rank_count_dimm1; ++ ++ if (package_type == PT_GR) { ++ /* Socket G34 */ ++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 73 */ ++ if (MaxDimmsInstallable == 1) { ++ rank_count_dimm0 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1]; ++ ++ if (MemClkFreq == 0x4) { ++ /* DDR3-667 */ ++ if (rank_count_dimm0 == 1) ++ calibration_code = 0x00000000; ++ else ++ calibration_code = 0x003b0000; ++ } else if (MemClkFreq == 0x6) { ++ /* DDR3-800 */ ++ if (rank_count_dimm0 == 1) ++ calibration_code = 0x00000000; ++ else ++ calibration_code = 0x003b0000; ++ } else if (MemClkFreq == 0xa) { ++ /* DDR3-1066 */ ++ calibration_code = 0x00383837; ++ } else if (MemClkFreq == 0xe) { ++ /* DDR3-1333 */ ++ calibration_code = 0x00363635; ++ } else if (MemClkFreq == 0x12) { ++ /* DDR3-1600 */ ++ if (rank_count_dimm0 == 1) ++ calibration_code = 0x00353533; ++ else ++ calibration_code = 0x00003533; ++ } else if (MemClkFreq == 0x16) { ++ /* DDR3-1866 */ ++ calibration_code = 0x00333330; ++ } ++ } else if (MaxDimmsInstallable == 2) { ++ if (dimm_count == 1) { ++ /* 1 DIMM detected */ ++ rank_count_dimm0 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1]; ++ ++ if (MemClkFreq == 0x4) { ++ /* DDR3-667 */ ++ if (rank_count_dimm0 == 1) ++ calibration_code = 0x00000000; ++ else ++ calibration_code = 0x003b0000; ++ } else if (MemClkFreq == 0x6) { ++ /* DDR3-800 */ ++ if (rank_count_dimm0 == 1) ++ calibration_code = 0x00000000; ++ else ++ calibration_code = 0x003b0000; ++ } else if (MemClkFreq == 0xa) { ++ /* DDR3-1066 */ ++ calibration_code = 0x00383837; ++ } else if (MemClkFreq == 0xe) { ++ /* DDR3-1333 */ ++ calibration_code = 0x00363635; ++ } else if (MemClkFreq == 0x12) { ++ /* DDR3-1600 */ ++ if (rank_count_dimm0 == 1) ++ calibration_code = 0x00353533; ++ else ++ calibration_code = 0x00003533; ++ } ++ } else if (dimm_count == 2) { ++ /* 2 DIMMs detected */ ++ rank_count_dimm0 = pDCTstat->C_DCTPtr[dct]->DimmRanks[0]; ++ rank_count_dimm1 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1]; ++ ++ if (MemClkFreq == 0x4) { ++ /* DDR3-667 */ ++ calibration_code = 0x00390039; ++ } else if (MemClkFreq == 0x6) { ++ /* DDR3-800 */ ++ calibration_code = 0x00390039; ++ } else if (MemClkFreq == 0xa) { ++ /* DDR3-1066 */ ++ calibration_code = 0x003a3a3a; ++ } else if (MemClkFreq == 0xe) { ++ /* DDR3-1333 */ ++ calibration_code = 0x00003939; ++ } else if (MemClkFreq == 0x12) { ++ /* DDR3-1600 */ ++ if ((rank_count_dimm0 == 1) && (rank_count_dimm1 == 1)) ++ calibration_code = 0x00003738; ++ } ++ } ++ } else if (MaxDimmsInstallable == 3) { ++ /* TODO ++ * 3 DIMM/channel support unimplemented ++ */ ++ } ++ } else { ++ /* TODO ++ * Other socket support unimplemented ++ */ ++ } ++ ++ return calibration_code; ++} ++ ++static uint8_t fam15h_slow_access_mode(struct DCTStatStruc *pDCTstat, uint8_t dct) ++{ ++ /* FIXME ++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel ++ * For now assume a maximum of 2 DIMMs per channel can be installed ++ */ ++ uint8_t MaxDimmsInstallable = 2; ++ ++ uint8_t package_type; ++ uint32_t slow_access = 0; ++ ++ package_type = mctGet_NVbits(NV_PACK_TYPE); ++ uint16_t MemClkFreq = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f; ++ ++ /* Obtain number of DIMMs on channel */ ++ uint8_t dimm_count = pDCTstat->MAdimms[dct]; ++ uint8_t rank_count_dimm0; ++ uint8_t rank_count_dimm1; ++ ++ if (package_type == PT_GR) { ++ /* Socket G34 */ ++ /* Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 Table 73 */ ++ if (MaxDimmsInstallable == 1) { ++ rank_count_dimm0 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1]; ++ ++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6) ++ || (MemClkFreq == 0xa) | (MemClkFreq == 0xe)) { ++ /* DDR3-667 - DDR3-1333 */ ++ slow_access = 0; ++ } else if ((MemClkFreq == 0x12) || (MemClkFreq == 0x16)) { ++ /* DDR3-1600 - DDR3-1866 */ ++ if (rank_count_dimm0 == 1) ++ slow_access = 0; ++ else ++ slow_access = 1; ++ } ++ } else if (MaxDimmsInstallable == 2) { ++ if (dimm_count == 1) { ++ /* 1 DIMM detected */ ++ rank_count_dimm0 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1]; ++ ++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6) ++ || (MemClkFreq == 0xa) | (MemClkFreq == 0xe)) { ++ /* DDR3-667 - DDR3-1333 */ ++ slow_access = 0; ++ } ++ else if (MemClkFreq == 0x12) { ++ /* DDR3-1600 */ ++ if (rank_count_dimm0 == 1) ++ slow_access = 0; ++ else ++ slow_access = 1; ++ } ++ } else if (dimm_count == 2) { ++ /* 2 DIMMs detected */ ++ rank_count_dimm0 = pDCTstat->C_DCTPtr[dct]->DimmRanks[0]; ++ rank_count_dimm1 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1]; ++ ++ if ((MemClkFreq == 0x4) || (MemClkFreq == 0x6) ++ || (MemClkFreq == 0xa)) { ++ /* DDR3-667 - DDR3-1066 */ ++ slow_access = 0; ++ } ++ else if ((MemClkFreq == 0xe) || (MemClkFreq == 0x12)) { ++ /* DDR3-1333 - DDR3-1600 */ ++ slow_access = 1; ++ } ++ } ++ } else if (MaxDimmsInstallable == 3) { ++ /* TODO ++ * 3 DIMM/channel support unimplemented ++ */ ++ } ++ } else { ++ /* TODO ++ * Other socket support unimplemented ++ */ ++ } ++ ++ return slow_access; ++} ++ ++static void set_2t_configuration(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, u8 dct) ++{ ++ uint32_t dev; ++ uint32_t reg; ++ uint32_t dword; ++ ++ uint8_t enable_slow_access_mode = 0; ++ dev = pDCTstat->dev_dct; ++ ++ if (is_fam15h()) { ++ if (pDCTstat->_2Tmode) ++ enable_slow_access_mode = 1; ++ } else { ++ if (pDCTstat->_2Tmode == 2) ++ enable_slow_access_mode = 1; ++ } ++ ++ reg = 0x94; /* DRAM Configuration High */ ++ dword = Get_NB32_DCT(dev, dct, reg); ++ if (enable_slow_access_mode) ++ dword |= (0x1 << 20); /* Set 2T CMD mode */ ++ else ++ dword &= ~(0x1 << 20); /* Clear 2T CMD mode */ ++ Set_NB32_DCT(dev, dct, reg, dword); ++} ++ ++static void precise_ndelay_fam15(struct MCTStatStruc *pMCTstat, uint32_t nanoseconds) { ++ msr_t tsc_msr; ++ uint64_t cycle_count = (((uint64_t)pMCTstat->TSCFreq) * nanoseconds) / 1000; ++ uint64_t start_timestamp; ++ uint64_t current_timestamp; ++ ++ tsc_msr = rdmsr(0x00000010); ++ start_timestamp = (((uint64_t)tsc_msr.hi) << 32) | tsc_msr.lo; ++ do { ++ tsc_msr = rdmsr(0x00000010); ++ current_timestamp = (((uint64_t)tsc_msr.hi) << 32) | tsc_msr.lo; ++ } while ((current_timestamp - start_timestamp) < cycle_count); ++} ++ ++static void precise_memclk_delay_fam15(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, uint32_t clocks) { ++ uint16_t memclk_freq; ++ uint32_t delay_ns; ++ uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933}; ++ ++ memclk_freq = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f; ++ ++ delay_ns = (((uint64_t)clocks * 1000) / fam15h_freq_tab[memclk_freq]); ++ precise_ndelay_fam15(pMCTstat, delay_ns); ++} ++ + static void mctAutoInitMCT_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) + { +@@ -277,10 +1224,26 @@ static void mctAutoInitMCT_D(struct MCTStatStruc *pMCTstat, + restartinit: + mctInitMemGPIOs_A_D(); /* Set any required GPIOs*/ + if (s3resume) { ++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_ForceNBPState0_En_Fam15\n"); ++ for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { ++ struct DCTStatStruc *pDCTstat; ++ pDCTstat = pDCTstatA + Node; ++ ++ mct_ForceNBPState0_En_Fam15(pMCTstat, pDCTstat); ++ } ++ + #if IS_ENABLED(CONFIG_HAVE_ACPI_RESUME) + printk(BIOS_DEBUG, "mctAutoInitMCT_D: Restoring DCT configuration from NVRAM\n"); + restore_mct_information_from_nvram(); + #endif ++ ++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_ForceNBPState0_Dis_Fam15\n"); ++ for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { ++ struct DCTStatStruc *pDCTstat; ++ pDCTstat = pDCTstatA + Node; ++ ++ mct_ForceNBPState0_Dis_Fam15(pMCTstat, pDCTstat); ++ } + } else { + NodesWmem = 0; + node_sys_base = 0; +@@ -297,15 +1260,15 @@ restartinit: + pDCTstat->dev_map = PA_MAP(Node); + pDCTstat->dev_dct = PA_DCT(Node); + pDCTstat->dev_nbmisc = PA_NBMISC(Node); ++ pDCTstat->dev_link = PA_LINK(Node); ++ pDCTstat->dev_nbctl = PA_NBCTL(Node); + pDCTstat->NodeSysBase = node_sys_base; + + printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_init Node %d\n", Node); + mct_init(pMCTstat, pDCTstat); + mctNodeIDDebugPort_D(); + pDCTstat->NodePresent = NodePresent_D(Node); +- if (pDCTstat->NodePresent) { /* See if Node is there*/ +- printk(BIOS_DEBUG, "mctAutoInitMCT_D: clear_legacy_Mode\n"); +- clear_legacy_Mode(pMCTstat, pDCTstat); ++ if (pDCTstat->NodePresent) { + pDCTstat->LogicalCPUID = mctGetLogicalCPUID_D(Node); + + printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_InitialMCT_D\n"); +@@ -314,6 +1277,26 @@ restartinit: + printk(BIOS_DEBUG, "mctAutoInitMCT_D: mctSMBhub_Init\n"); + mctSMBhub_Init(Node); /* Switch SMBUS crossbar to proper node*/ + ++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_preInitDCT\n"); ++ mct_preInitDCT(pMCTstat, pDCTstat); ++ } ++ node_sys_base = pDCTstat->NodeSysBase; ++ node_sys_base += (pDCTstat->NodeSysLimit + 2) & ~0x0F; ++ } ++ ++#if IS_ENABLED(DIMM_VOLTAGE_SET_SUPPORT) ++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: DIMMSetVoltage\n"); ++ DIMMSetVoltages(pMCTstat, pDCTstatA); /* Set the DIMM voltages (mainboard specific) */ ++#endif ++ ++ for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { ++ struct DCTStatStruc *pDCTstat; ++ pDCTstat = pDCTstatA + Node; ++ ++ if (pDCTstat->NodePresent) { ++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mctSMBhub_Init\n"); ++ mctSMBhub_Init(Node); /* Switch SMBUS crossbar to proper node*/ ++ + printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_initDCT\n"); + mct_initDCT(pMCTstat, pDCTstat); + if (pDCTstat->ErrCode == SC_FatalErr) { +@@ -321,20 +1304,13 @@ restartinit: + } else if (pDCTstat->ErrCode < SC_StopError) { + NodesWmem++; + } +- } /* if Node present */ +- node_sys_base = pDCTstat->NodeSysBase; +- node_sys_base += (pDCTstat->NodeSysLimit + 2) & ~0x0F; ++ } + } + if (NodesWmem == 0) { + printk(BIOS_DEBUG, "No Nodes?!\n"); + goto fatalexit; + } + +-#if IS_ENABLED(DIMM_VOLTAGE_SET_SUPPORT) +- printk(BIOS_DEBUG, "mctAutoInitMCT_D: DIMMSetVoltage\n"); +- DIMMSetVoltages(pMCTstat, pDCTstatA); /* Set the DIMM voltages (mainboard specific) */ +-#endif +- + printk(BIOS_DEBUG, "mctAutoInitMCT_D: SyncDCTsReady_D\n"); + SyncDCTsReady_D(pMCTstat, pDCTstatA); /* Make sure DCTs are ready for accesses.*/ + +@@ -355,7 +1331,6 @@ restartinit: + printk(BIOS_DEBUG, "mctAutoInitMCT_D: :OtherTiming\n"); + mct_OtherTiming(pMCTstat, pDCTstatA); + +- + if (ReconfigureDIMMspare_D(pMCTstat, pDCTstatA)) { /* RESET# if 1st pass of DIMM spare enabled*/ + goto restartinit; + } +@@ -369,6 +1344,14 @@ restartinit: + MCTMemClr_D(pMCTstat,pDCTstatA); + } + ++ printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_ForceNBPState0_Dis_Fam15\n"); ++ for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { ++ struct DCTStatStruc *pDCTstat; ++ pDCTstat = pDCTstatA + Node; ++ ++ mct_ForceNBPState0_Dis_Fam15(pMCTstat, pDCTstat); ++ } ++ + mct_FinalMCT_D(pMCTstat, pDCTstatA); + printk(BIOS_DEBUG, "mctAutoInitMCT_D Done: Global Status: %x\n", pMCTstat->GStatus); + } +@@ -408,6 +1391,425 @@ static u8 ReconfigureDIMMspare_D(struct MCTStatStruc *pMCTstat, + return ret; + } + ++/* Enable or disable phy-assisted training mode ++ * Phy-assisted training mode applies to the follow DRAM training procedures: ++ * Write Levelization Training (2.10.5.8.1) ++ * DQS Receiver Enable Training (2.10.5.8.2) ++ */ ++static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t enable) ++{ ++ uint8_t index; ++ uint32_t dword; ++ uint32_t index_reg = 0x98; ++ uint32_t dev = pDCTstat->dev_dct; ++ ++ if (enable) { ++ /* Enable training mode */ ++ dword = Get_NB32_DCT(dev, dct, 0x78); /* DRAM Control */ ++ dword &= ~(0x1 << 17); /* AddrCmdTriEn = 0 */ ++ Set_NB32_DCT(dev, dct, 0x78, dword); /* DRAM Control */ ++ ++ dword = Get_NB32_DCT(dev, dct, 0x8c); /* DRAM Timing High */ ++ dword |= (0x1 << 18); /* DisAutoRefresh = 1 */ ++ Set_NB32_DCT(dev, dct, 0x8c, dword); /* DRAM Timing High */ ++ ++ dword = Get_NB32_DCT(dev, dct, 0x94); /* DRAM Configuration High */ ++ dword &= ~(0xf << 24); /* DcqBypassMax = 0 */ ++ dword &= ~(0x1 << 22); /* BankSwizzleMode = 0 */ ++ dword &= ~(0x1 << 15); /* PowerDownEn = 0 */ ++ dword &= ~(0x3 << 10); /* ZqcsInterval = 0 */ ++ Set_NB32_DCT(dev, dct, 0x94, dword); /* DRAM Configuration High */ ++ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000d); ++ dword &= ~(0xf << 16); /* RxMaxDurDllNoLock = 0 */ ++ dword &= ~(0xf); /* TxMaxDurDllNoLock = 0 */ ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000d, dword); ++ ++ for (index = 0; index < 0x9; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0010 | (index << 8)); ++ dword &= ~(0x1 << 12); /* EnRxPadStandby = 0 */ ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0010 | (index << 8), dword); ++ } ++ ++ dword = Get_NB32_DCT(dev, dct, 0xa4); /* DRAM Controller Temperature Throttle */ ++ dword &= ~(0x1 << 11); /* BwCapEn = 0 */ ++ dword &= ~(0x1 << 8); /* ODTSEn = 0 */ ++ Set_NB32_DCT(dev, dct, 0xa4, dword); /* DRAM Controller Temperature Throttle */ ++ ++ dword = Get_NB32_DCT(dev, dct, 0x110); /* DRAM Controller Select Low */ ++ dword &= ~(0x1 << 2); /* DctSelIntLvEn = 0 */ ++ Set_NB32_DCT(dev, dct, 0x110, dword); /* DRAM Controller Select Low */ ++ ++ dword = Get_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x58); /* Scrub Rate Control */ ++ dword &= ~(0x1f << 24); /* L3Scrub = 0 */ ++ dword &= ~(0x1f); /* DramScrub = 0 */ ++ Set_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x58, dword); /* Scrub Rate Control */ ++ ++ dword = Get_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x5c); /* DRAM Scrub Address Low */ ++ dword &= ~(0x1); /* ScrubReDirEn = 0 */ ++ Set_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x5c, dword); /* DRAM Scrub Address Low */ ++ ++ dword = Get_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x1b8); /* L3 Control 1 */ ++ dword |= (0x1 << 4); /* L3ScrbRedirDis = 1 */ ++ Set_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x1b8, dword); /* L3 Control 1 */ ++ ++ /* Fam15h BKDG section 2.10.5.5.1 */ ++ dword = Get_NB32_DCT(dev, dct, 0x218); /* DRAM Timing 5 */ ++ dword &= ~(0xf << 24); /* TrdrdSdSc = 0xb */ ++ dword |= (0xb << 24); ++ dword &= ~(0xf << 16); /* TrdrdSdDc = 0xb */ ++ dword |= (0xb << 16); ++ dword &= ~(0xf); /* TrdrdDd = 0xb */ ++ dword |= 0xb; ++ Set_NB32_DCT(dev, dct, 0x218, dword); /* DRAM Timing 5 */ ++ ++ /* Fam15h BKDG section 2.10.5.5.2 */ ++ dword = Get_NB32_DCT(dev, dct, 0x214); /* DRAM Timing 4 */ ++ dword &= ~(0xf << 16); /* TwrwrSdSc = 0xb */ ++ dword |= (0xb << 16); ++ dword &= ~(0xf << 8); /* TwrwrSdDc = 0xb */ ++ dword |= (0xb << 8); ++ dword &= ~(0xf); /* TwrwrDd = 0xb */ ++ dword |= 0xb; ++ Set_NB32_DCT(dev, dct, 0x214, dword); /* DRAM Timing 4 */ ++ ++ /* Fam15h BKDG section 2.10.5.5.3 */ ++ dword = Get_NB32_DCT(dev, dct, 0x218); /* DRAM Timing 5 */ ++ dword &= ~(0xf << 8); /* Twrrd = 0xb */ ++ dword |= (0xb << 8); ++ Set_NB32_DCT(dev, dct, 0x218, dword); /* DRAM Timing 5 */ ++ ++ /* Fam15h BKDG section 2.10.5.5.4 */ ++ dword = Get_NB32_DCT(dev, dct, 0x21c); /* DRAM Timing 6 */ ++ dword &= ~(0x1f << 8); /* TrwtTO = 0x16 */ ++ dword |= (0x16 << 8); ++ dword &= ~(0x1f << 16); /* TrwtWB = TrwtTO + 1 */ ++ dword |= ((((dword >> 8) & 0x1f) + 1) << 16); ++ Set_NB32_DCT(dev, dct, 0x21c, dword); /* DRAM Timing 6 */ ++ } else { ++ /* Disable training mode */ ++ uint8_t lane; ++ uint8_t dimm; ++ uint8_t receiver; ++ uint8_t max_lane; ++ uint8_t ecc_enabled; ++ uint8_t x4_present = 0; ++ uint8_t x8_present = 0; ++ uint8_t memclk_index; ++ uint8_t interleave_channels = 0; ++ uint8_t redirect_ecc_scrub = 0; ++ uint16_t trdrdsddc; ++ uint16_t trdrddd; ++ uint16_t cdd_trdrddd; ++ uint16_t twrwrsddc; ++ uint16_t twrwrdd; ++ uint16_t cdd_twrwrdd; ++ uint16_t twrrd; ++ uint16_t trwtto; ++ uint8_t first_dimm; ++ uint16_t delay; ++ uint16_t delay2; ++ uint8_t read_odt_delay; ++ uint8_t write_odt_delay; ++ uint16_t difference; ++ uint16_t current_total_delay_1[MAX_BYTE_LANES]; ++ uint16_t current_total_delay_2[MAX_BYTE_LANES]; ++ ++ /* FIXME ++ * This should be platform configurable ++ */ ++ uint8_t dimm_event_l_pin_support = 0; ++ ++ ecc_enabled = !!(pMCTstat->GStatus & 1 << GSB_ECCDIMMs); ++ if (ecc_enabled) ++ max_lane = 9; ++ else ++ max_lane = 8; ++ ++ if (pDCTstat->Dimmx4Present & ((dct)?0xaa:0x55)) ++ x4_present = 1; ++ if (pDCTstat->Dimmx8Present & ((dct)?0xaa:0x55)) ++ x8_present = 1; ++ memclk_index = Get_NB32_DCT(dev, dct, 0x94) & 0x1f; ++ ++ if (pDCTstat->DIMMValidDCT[0] && pDCTstat->DIMMValidDCT[1] && mctGet_NVbits(NV_Unganged)) ++ interleave_channels = 1; ++ ++ if ((pMCTstat->GStatus & 1 << GSB_ECCDIMMs) && mctGet_NVbits(NV_ECCRedir)) ++ redirect_ecc_scrub = 1; ++ ++ dword = (Get_NB32_DCT(dev, dct, 0x240) >> 4) & 0xf; ++ if (dword > 6) ++ read_odt_delay = dword - 6; ++ else ++ read_odt_delay = 0; ++ ++ dword = Get_NB32_DCT(dev, dct, 0x240); ++ delay = (dword >> 4) & 0xf; ++ if (delay > 6) ++ read_odt_delay = delay - 6; ++ else ++ read_odt_delay = 0; ++ delay = (dword >> 12) & 0x7; ++ if (delay > 6) ++ write_odt_delay = delay - 6; ++ else ++ write_odt_delay = 0; ++ ++ /* TODO: ++ * Adjust trdrdsddc if four-rank DIMMs are installed per ++ * section 2.10.5.5.1 of the Family 15h BKDG. ++ * cdd_trdrdsddc will also need to be calculated in that process. ++ */ ++ trdrdsddc = 3; ++ ++ /* Calculate the Critical Delay Difference for TrdrdDd */ ++ cdd_trdrddd = 0; ++ first_dimm = 1; ++ for (receiver = 0; receiver < 8; receiver += 2) { ++ dimm = (receiver >> 1); ++ ++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, receiver)) ++ continue; ++ ++ read_dqs_receiver_enable_control_registers(current_total_delay_2, dev, dct, dimm, index_reg); ++ ++ if (first_dimm) { ++ memcpy(current_total_delay_1, current_total_delay_2, sizeof(current_total_delay_1)); ++ first_dimm = 0; ++ } ++ ++ for (lane = 0; lane < max_lane; lane++) { ++ if (current_total_delay_1[lane] > current_total_delay_2[lane]) ++ difference = current_total_delay_1[lane] - current_total_delay_2[lane]; ++ else ++ difference = current_total_delay_2[lane] - current_total_delay_1[lane]; ++ ++ if (difference > cdd_trdrddd) ++ cdd_trdrddd = difference; ++ } ++ } ++ ++ /* Convert the difference to MEMCLKs */ ++ cdd_trdrddd = (((cdd_trdrddd >> 5) & 0x1f) + 1) / 2; ++ ++ /* Calculate Trdrddd */ ++ delay = (read_odt_delay + 3) * 2; ++ delay2 = cdd_trdrddd + 7; ++ if (delay2 > delay) ++ delay = delay2; ++ trdrddd = (delay + 1) / 2; /* + 1 is equivalent to ceiling function here */ ++ if (trdrdsddc > trdrddd) ++ trdrddd = trdrdsddc; ++ ++ /* TODO: ++ * Adjust twrwrsddc if four-rank DIMMs are installed per ++ * section 2.10.5.5.1 of the Family 15h BKDG. ++ * cdd_twrwrsddc will also need to be calculated in that process. ++ */ ++ twrwrsddc = 4; ++ ++ /* Calculate the Critical Delay Difference for TwrwrDd */ ++ cdd_twrwrdd = 0; ++ first_dimm = 1; ++ for (receiver = 0; receiver < 8; receiver += 2) { ++ dimm = (receiver >> 1); ++ ++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, receiver)) ++ continue; ++ ++ read_dqs_write_timing_control_registers(current_total_delay_2, dev, dct, dimm, index_reg); ++ ++ if (first_dimm) { ++ memcpy(current_total_delay_1, current_total_delay_2, sizeof(current_total_delay_1)); ++ first_dimm = 0; ++ } ++ ++ for (lane = 0; lane < max_lane; lane++) { ++ if (current_total_delay_1[lane] > current_total_delay_2[lane]) ++ difference = current_total_delay_1[lane] - current_total_delay_2[lane]; ++ else ++ difference = current_total_delay_2[lane] - current_total_delay_1[lane]; ++ ++ if (difference > cdd_twrwrdd) ++ cdd_twrwrdd = difference; ++ } ++ } ++ ++ /* Convert the difference to MEMCLKs */ ++ cdd_twrwrdd = (((cdd_twrwrdd >> 5) & 0x1f) + 1) / 2; ++ ++ /* Calculate Twrwrdd */ ++ delay = (write_odt_delay + 3) * 2; ++ delay2 = cdd_twrwrdd + 7; ++ if (delay2 > delay) ++ delay = delay2; ++ twrwrdd = (delay + 1) / 2; /* + 1 is equivalent to ceiling function here */ ++ if (twrwrsddc > twrwrdd) ++ twrwrdd = twrwrsddc; ++ ++ dword = Get_NB32_DCT(dev, dct, 0x78); /* DRAM Control */ ++ dword |= (0x1 << 17); /* AddrCmdTriEn = 1 */ ++ Set_NB32_DCT(dev, dct, 0x78, dword); /* DRAM Control */ ++ ++ dword = Get_NB32_DCT(dev, dct, 0x8c); /* DRAM Timing High */ ++ dword &= ~(0x1 << 18); /* DisAutoRefresh = 0 */ ++ Set_NB32_DCT(dev, dct, 0x8c, dword); /* DRAM Timing High */ ++ ++ dword = Get_NB32_DCT(dev, dct, 0x94); /* DRAM Configuration High */ ++ dword |= (0xf << 24); /* DcqBypassMax = 0xf */ ++ dword |= (0x1 << 22); /* BankSwizzleMode = 1 */ ++ dword |= (0x1 << 15); /* PowerDownEn = 1 */ ++ dword &= ~(0x3 << 10); /* ZqcsInterval = 0x2 */ ++ dword |= (0x2 << 10); ++ Set_NB32_DCT(dev, dct, 0x94, dword); /* DRAM Configuration High */ ++ ++ if (x4_present && x8_present) { ++ /* Mixed channel of 4x and 8x DIMMs */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000d); ++ dword &= ~(0x3 << 24); /* RxDLLWakeupTime = 0 */ ++ dword &= ~(0x7 << 20); /* RxCPUpdPeriod = 0 */ ++ dword &= ~(0xf << 16); /* RxMaxDurDllNoLock = 0 */ ++ dword &= ~(0x3 << 8); /* TxDLLWakeupTime = 0 */ ++ dword &= ~(0x7 << 4); /* TxCPUpdPeriod = 0 */ ++ dword &= ~(0xf); /* TxMaxDurDllNoLock = 0 */ ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000d, dword); ++ } else { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000d); ++ dword &= ~(0x3 << 24); /* RxDLLWakeupTime = 3 */ ++ dword |= (0x3 << 24); ++ dword &= ~(0x7 << 20); /* RxCPUpdPeriod = 3 */ ++ dword |= (0x3 << 20); ++ dword &= ~(0xf << 16); /* RxMaxDurDllNoLock = 7 */ ++ dword |= (0x7 << 16); ++ dword &= ~(0x3 << 8); /* TxDLLWakeupTime = 3 */ ++ dword |= (0x3 << 8); ++ dword &= ~(0x7 << 4); /* TxCPUpdPeriod = 3 */ ++ dword |= (0x3 << 4); ++ dword &= ~(0xf); /* TxMaxDurDllNoLock = 7 */ ++ dword |= 0x7; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000d, dword); ++ } ++ ++ if ((memclk_index <= 0x12) && (x4_present != x8_present)) { ++ /* MemClkFreq <= 800MHz ++ * Not a mixed channel of x4 and x8 DIMMs ++ */ ++ for (index = 0; index < 0x9; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0010 | (index << 8)); ++ dword |= (0x1 << 12); /* EnRxPadStandby = 1 */ ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0010 | (index << 8), dword); ++ } ++ } else { ++ for (index = 0; index < 0x9; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0010 | (index << 8)); ++ dword &= ~(0x1 << 12); /* EnRxPadStandby = 0 */ ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0010 | (index << 8), dword); ++ } ++ } ++ ++ /* TODO ++ * Calculate Twrrd per section 2.10.5.5.3 of the Family 15h BKDG ++ */ ++ twrrd = 0xb; ++ ++ /* TODO ++ * Calculate TrwtTO per section 2.10.5.5.4 of the Family 15h BKDG ++ */ ++ trwtto = 0x16; ++ ++ dword = Get_NB32_DCT(dev, dct, 0xa4); /* DRAM Controller Temperature Throttle */ ++ dword &= ~(0x1 << 11); /* BwCapEn = 0 */ ++ dword &= ~(0x1 << 8); /* ODTSEn = dimm_event_l_pin_support */ ++ dword |= (dimm_event_l_pin_support & 0x1) << 8; ++ Set_NB32_DCT(dev, dct, 0xa4, dword); /* DRAM Controller Temperature Throttle */ ++ ++ dword = Get_NB32_DCT(dev, dct, 0x110); /* DRAM Controller Select Low */ ++ dword &= ~(0x1 << 2); /* DctSelIntLvEn = interleave_channels */ ++ dword |= (interleave_channels & 0x1) << 2; ++ Set_NB32_DCT(dev, dct, 0x110, dword); /* DRAM Controller Select Low */ ++ ++ dword = Get_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x58); /* Scrub Rate Control */ ++ dword &= ~(0x1f << 24); /* L3Scrub = NV_L3BKScrub */ ++ dword |= (mctGet_NVbits(NV_L3BKScrub) & 0x1f) << 24; ++ dword &= ~(0x1f); /* DramScrub = NV_DramBKScrub */ ++ dword |= mctGet_NVbits(NV_DramBKScrub) & 0x1f; ++ Set_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x58, dword); /* Scrub Rate Control */ ++ ++ dword = Get_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x5c); /* DRAM Scrub Address Low */ ++ dword &= ~(0x1); /* ScrubReDirEn = redirect_ecc_scrub */ ++ dword |= redirect_ecc_scrub & 0x1; ++ Set_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x5c, dword); /* DRAM Scrub Address Low */ ++ ++ dword = Get_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x1b8); /* L3 Control 1 */ ++ dword &= ~(0x1 << 4); /* L3ScrbRedirDis = 0 */ ++ Set_NB32_DCT(pDCTstat->dev_nbmisc, dct, 0x1b8, dword); /* L3 Control 1 */ ++ ++ /* FIXME ++ * The BKDG-recommended settings cause memory corruption on the ASUS KGPE-D16. ++ * Investigate and fix... ++ */ ++#if 0 ++ /* Fam15h BKDG section 2.10.5.5.1 */ ++ dword = Get_NB32_DCT(dev, dct, 0x218); /* DRAM Timing 5 */ ++ dword &= ~(0xf << 24); /* TrdrdSdSc = 0x1 */ ++ dword |= (0x1 << 24); ++ dword &= ~(0xf << 16); /* TrdrdSdDc = trdrdsddc */ ++ dword |= ((trdrdsddc & 0xf) << 16); ++ dword &= ~(0xf); /* TrdrdDd = trdrddd */ ++ dword |= (trdrddd & 0xf); ++ Set_NB32_DCT(dev, dct, 0x218, dword); /* DRAM Timing 5 */ ++#endif ++ ++ /* Fam15h BKDG section 2.10.5.5.2 */ ++ dword = Get_NB32_DCT(dev, dct, 0x214); /* DRAM Timing 4 */ ++ dword &= ~(0xf << 16); /* TwrwrSdSc = 0x1 */ ++ dword |= (0x1 << 16); ++ dword &= ~(0xf << 8); /* TwrwrSdDc = twrwrsddc */ ++ dword |= ((twrwrsddc & 0xf) << 8); ++ dword &= ~(0xf); /* TwrwrDd = twrwrdd */ ++ dword |= (twrwrdd & 0xf); ++ Set_NB32_DCT(dev, dct, 0x214, dword); /* DRAM Timing 4 */ ++ ++ /* Fam15h BKDG section 2.10.5.5.3 */ ++ dword = Get_NB32_DCT(dev, dct, 0x218); /* DRAM Timing 5 */ ++ dword &= ~(0xf << 8); /* Twrrd = twrrd */ ++ dword |= ((twrrd & 0xf) << 8); ++ Set_NB32_DCT(dev, dct, 0x218, dword); /* DRAM Timing 5 */ ++ ++ /* Fam15h BKDG section 2.10.5.5.4 */ ++ dword = Get_NB32_DCT(dev, dct, 0x21c); /* DRAM Timing 6 */ ++ dword &= ~(0x1f << 8); /* TrwtTO = trwtto */ ++ dword |= ((trwtto & 0x1f) << 8); ++ dword &= ~(0x1f << 16); /* TrwtWB = TrwtTO + 1 */ ++ dword |= ((((dword >> 8) & 0x1f) + 1) << 16); ++ Set_NB32_DCT(dev, dct, 0x21c, dword); /* DRAM Timing 6 */ ++ ++ /* Enable prefetchers */ ++ dword = Get_NB32_DCT(dev, dct, 0x110); /* Memory Controller Configuration High */ ++ dword &= ~(0x1 << 13); /* PrefIoDis = 0 */ ++ dword &= ~(0x1 << 12); /* PrefCpuDis = 0 */ ++ Set_NB32_DCT(dev, dct, 0x110, dword); /* Memory Controller Configuration High */ ++ } ++} ++ ++static void exit_training_mode_fam15(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstatA) ++{ ++ uint8_t node; ++ uint8_t dct; ++ ++ for (node = 0; node < MAX_NODES_SUPPORTED; node++) { ++ struct DCTStatStruc *pDCTstat; ++ pDCTstat = pDCTstatA + node; ++ ++ if (pDCTstat->NodePresent) ++ for (dct = 0; dct < 2; dct++) ++ fam15EnableTrainingMode(pMCTstat, pDCTstat, dct, 0); ++ } ++} ++ + static void DQSTiming_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) + { +@@ -424,6 +1826,20 @@ static void DQSTiming_D(struct MCTStatStruc *pMCTstat, + mct_BeforeDQSTrain_D(pMCTstat, pDCTstatA); + phyAssistedMemFnceTraining(pMCTstat, pDCTstatA); + ++ if (is_fam15h()) { ++ uint8_t Node; ++ struct DCTStatStruc *pDCTstat; ++ for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { ++ pDCTstat = pDCTstatA + Node; ++ if (pDCTstat->NodePresent) { ++ if (pDCTstat->DIMMValidDCT[0]) ++ InitPhyCompensation(pMCTstat, pDCTstat, 0); ++ if (pDCTstat->DIMMValidDCT[1]) ++ InitPhyCompensation(pMCTstat, pDCTstat, 1); ++ } ++ } ++ } ++ + if (nv_DQSTrainCTL) { + mctHookBeforeAnyTraining(pMCTstat, pDCTstatA); + /* TODO: should be in mctHookBeforeAnyTraining */ +@@ -431,16 +1847,35 @@ static void DQSTiming_D(struct MCTStatStruc *pMCTstat, + _WRMSR(0x26D, 0x04040404, 0x04040404); + _WRMSR(0x26E, 0x04040404, 0x04040404); + _WRMSR(0x26F, 0x04040404, 0x04040404); +- mct_WriteLevelization_HW(pMCTstat, pDCTstatA); ++ mct_WriteLevelization_HW(pMCTstat, pDCTstatA, FirstPass); + +- TrainReceiverEn_D(pMCTstat, pDCTstatA, FirstPass); ++ if (is_fam15h()) { ++ /* Receiver Enable Training Pass 1 */ ++ TrainReceiverEn_D(pMCTstat, pDCTstatA, FirstPass); ++ } + +- mct_TrainDQSPos_D(pMCTstat, pDCTstatA); ++ mct_WriteLevelization_HW(pMCTstat, pDCTstatA, SecondPass); ++ ++ if (is_fam15h()) { ++ /* Receiver Enable Training Pass 2 */ ++ // TrainReceiverEn_D(pMCTstat, pDCTstatA, SecondPass); ++ ++ /* TODO: ++ * Determine why running TrainReceiverEn_D in SecondPass ++ * mode yields less stable training values than when run ++ * in FirstPass mode as in the HACK below. ++ */ ++ TrainReceiverEn_D(pMCTstat, pDCTstatA, FirstPass); ++ } else { ++ TrainReceiverEn_D(pMCTstat, pDCTstatA, FirstPass); ++ } + +- /* Second Pass never used for Barcelona! */ +- /* TrainReceiverEn_D(pMCTstat, pDCTstatA, SecondPass); */ ++ mct_TrainDQSPos_D(pMCTstat, pDCTstatA); + +- mctSetEccDQSRcvrEn_D(pMCTstat, pDCTstatA); ++ if (is_fam15h()) ++ exit_training_mode_fam15(pMCTstat, pDCTstatA); ++ else ++ mctSetEccDQSRcvrEn_D(pMCTstat, pDCTstatA); + + /* FIXME - currently uses calculated value TrainMaxReadLatency_D(pMCTstat, pDCTstatA); */ + mctHookAfterAnyTraining(); +@@ -476,7 +1911,7 @@ static void LoadDQSSigTmgRegs_D(struct MCTStatStruc *pMCTstat, + for (Channel = 0;Channel < 2; Channel++) { + /* there are four receiver pairs, + loosely associated with chipselects.*/ +- index_reg = 0x98 + Channel * 0x100; ++ index_reg = 0x98; + for (Receiver = 0; Receiver < 8; Receiver += 2) { + /* Set Receiver Enable Values */ + mct_SetRcvrEnDly_D(pDCTstat, +@@ -492,7 +1927,7 @@ static void LoadDQSSigTmgRegs_D(struct MCTStatStruc *pMCTstat, + txdqs = pDCTstat->CH_D_B_TxDqs[Channel][Receiver >> 1][ByteLane]; + index = Table_DQSRcvEn_Offset[ByteLane >> 1]; + index += (Receiver >> 1) * 3 + 0x10 + 0x20; /* Addl_Index */ +- val = Get_NB32_index_wait(dev, 0x98 + 0x100*Channel, index); ++ val = Get_NB32_index_wait_DCT(dev, Channel, 0x98, index); + if (ByteLane & 1) { /* odd byte lane */ + val &= ~(0xFF << 16); + val |= txdqs << 16; +@@ -500,7 +1935,7 @@ static void LoadDQSSigTmgRegs_D(struct MCTStatStruc *pMCTstat, + val &= ~0xFF; + val |= txdqs; + } +- Set_NB32_index_wait(dev, 0x98 + 0x100*Channel, index, val); ++ Set_NB32_index_wait_DCT(dev, Channel, 0x98, index, val); + } + } + } +@@ -510,7 +1945,7 @@ static void LoadDQSSigTmgRegs_D(struct MCTStatStruc *pMCTstat, + + for (Channel = 0; Channel < 2; Channel++) { + u8 *p; +- index_reg = 0x98 + Channel * 0x100; ++ index_reg = 0x98; + + /* NOTE: + * when 400, 533, 667, it will support dimm0/1/2/3, +@@ -525,7 +1960,7 @@ static void LoadDQSSigTmgRegs_D(struct MCTStatStruc *pMCTstat, + if (DIMM == 0) { + index = 0; /* CHA Write Data Timing Low */ + } else { +- if (pDCTstat->Speed >= 4) { ++ if (pDCTstat->Speed >= mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) { + index = 0x100 * DIMM; + } else { + break; +@@ -534,23 +1969,23 @@ static void LoadDQSSigTmgRegs_D(struct MCTStatStruc *pMCTstat, + for (Dir = 0; Dir < 2; Dir++) {/* RD/WR */ + p = pDCTstat->CH_D_DIR_B_DQS[Channel][DIMM][Dir]; + val = stream_to_int(p); /* CHA Read Data Timing High */ +- Set_NB32_index_wait(dev, index_reg, index+1, val); ++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, index+1, val); + val = stream_to_int(p+4); /* CHA Write Data Timing High */ +- Set_NB32_index_wait(dev, index_reg, index+2, val); ++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, index+2, val); + val = *(p+8); /* CHA Write ECC Timing */ +- Set_NB32_index_wait(dev, index_reg, index+3, val); ++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, index+3, val); + index += 4; + } + } + } + + for (Channel = 0; Channel<2; Channel++) { +- reg = 0x78 + Channel * 0x100; +- val = Get_NB32(dev, reg); ++ reg = 0x78; ++ val = Get_NB32_DCT(dev, Channel, reg); + val &= ~(0x3ff<<22); + val |= ((u32) pDCTstat->CH_MaxRdLat[Channel] << 22); + val &= ~(1<<DqsRcvEnTrain); +- Set_NB32(dev, reg, val); /* program MaxRdLatency to correspond with current delay*/ ++ Set_NB32_DCT(dev, Channel, reg, val); /* program MaxRdLatency to correspond with current delay*/ + } + } + } +@@ -812,49 +2247,70 @@ finish: + return ret; + } + +-static void DCTInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct) ++static void DCTPreInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct) + { + /* +- * Initialize DRAM on single Athlon 64/Opteron Node. ++ * Run DCT pre-initialization tasks + */ +- u8 stopDCTflag; +- u32 val; ++ uint32_t dword; + ++ /* Reset DCT registers */ + ClearDCT_D(pMCTstat, pDCTstat, dct); +- stopDCTflag = 1; /*preload flag with 'disable' */ +- /* enable DDR3 support */ +- val = Get_NB32(pDCTstat->dev_dct, 0x94 + dct * 0x100); +- val |= 1 << Ddr3Mode; +- Set_NB32(pDCTstat->dev_dct, 0x94 + dct * 0x100, val); ++ pDCTstat->stopDCT = 1; /*preload flag with 'disable' */ ++ ++ if (!is_fam15h()) { ++ /* Enable DDR3 support */ ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94); ++ dword |= 1 << Ddr3Mode; ++ Set_NB32_DCT(pDCTstat->dev_dct, dct, 0x94, dword); ++ } ++ ++ /* Read the SPD information into the data structures */ + if (mct_DIMMPresence(pMCTstat, pDCTstat, dct) < SC_StopError) { + printk(BIOS_DEBUG, "\t\tDCTInit_D: mct_DIMMPresence Done\n"); +- if (mct_SPDCalcWidth(pMCTstat, pDCTstat, dct) < SC_StopError) { +- printk(BIOS_DEBUG, "\t\tDCTInit_D: mct_SPDCalcWidth Done\n"); +- if (AutoCycTiming_D(pMCTstat, pDCTstat, dct) < SC_StopError) { +- printk(BIOS_DEBUG, "\t\tDCTInit_D: AutoCycTiming_D Done\n"); +- if (AutoConfig_D(pMCTstat, pDCTstat, dct) < SC_StopError) { +- printk(BIOS_DEBUG, "\t\tDCTInit_D: AutoConfig_D Done\n"); +- if (PlatformSpec_D(pMCTstat, pDCTstat, dct) < SC_StopError) { +- printk(BIOS_DEBUG, "\t\tDCTInit_D: PlatformSpec_D Done\n"); +- stopDCTflag = 0; +- if (!(pMCTstat->GStatus & (1 << GSB_EnDIMMSpareNW))) { +- printk(BIOS_DEBUG, "\t\tDCTInit_D: StartupDCT_D\n"); +- StartupDCT_D(pMCTstat, pDCTstat, dct); /*yeaahhh! */ +- } ++ } ++} ++ ++static void DCTInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct) ++{ ++ /* ++ * Initialize DRAM on single Athlon 64/Opteron Node. ++ */ ++ uint32_t dword; ++ ++ if (!is_fam15h()) { ++ /* (Re)-enable DDR3 support */ ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94); ++ dword |= 1 << Ddr3Mode; ++ Set_NB32_DCT(pDCTstat->dev_dct, dct, 0x94, dword); ++ } ++ ++ if (mct_SPDCalcWidth(pMCTstat, pDCTstat, dct) < SC_StopError) { ++ printk(BIOS_DEBUG, "\t\tDCTInit_D: mct_SPDCalcWidth Done\n"); ++ if (AutoCycTiming_D(pMCTstat, pDCTstat, dct) < SC_StopError) { ++ printk(BIOS_DEBUG, "\t\tDCTInit_D: AutoCycTiming_D Done\n"); ++ if (AutoConfig_D(pMCTstat, pDCTstat, dct) < SC_StopError) { ++ printk(BIOS_DEBUG, "\t\tDCTInit_D: AutoConfig_D Done\n"); ++ if (PlatformSpec_D(pMCTstat, pDCTstat, dct) < SC_StopError) { ++ printk(BIOS_DEBUG, "\t\tDCTInit_D: PlatformSpec_D Done\n"); ++ pDCTstat->stopDCT = 0; ++ if (!(pMCTstat->GStatus & (1 << GSB_EnDIMMSpareNW))) { ++ printk(BIOS_DEBUG, "\t\tDCTInit_D: StartupDCT_D\n"); ++ StartupDCT_D(pMCTstat, pDCTstat, dct); /*yeaahhh! */ + } + } + } + } + } + +- if (stopDCTflag) { +- u32 reg_off = dct * 0x100; +- val = 1<<DisDramInterface; +- Set_NB32(pDCTstat->dev_dct, reg_off+0x94, val); +- /*To maximize power savings when DisDramInterface=1b, +- all of the MemClkDis bits should also be set.*/ +- val = 0xFF000000; +- Set_NB32(pDCTstat->dev_dct, reg_off+0x88, val); ++ if (pDCTstat->stopDCT) { ++ dword = 1 << DisDramInterface; ++ Set_NB32_DCT(pDCTstat->dev_dct, dct, 0x94, dword); ++ ++ /* To maximize power savings when DisDramInterface=1b, ++ * all of the MemClkDis bits should also be set. ++ */ ++ Set_NB32_DCT(pDCTstat->dev_dct, dct, 0x88, 0xff000000); + } else { + mct_EnDllShutdownSR(pMCTstat, pDCTstat, dct); + } +@@ -876,20 +2332,24 @@ static void SyncDCTsReady_D(struct MCTStatStruc *pMCTstat, + pDCTstat = pDCTstatA + Node; + mct_SyncDCTsReady(pDCTstat); + } +- /* v6.1.3 */ +- /* re-enable phy compensation engine when dram init is completed on all nodes. */ +- for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { +- struct DCTStatStruc *pDCTstat; +- pDCTstat = pDCTstatA + Node; +- if (pDCTstat->NodePresent) { +- if (pDCTstat->DIMMValidDCT[0] > 0 || pDCTstat->DIMMValidDCT[1] > 0) { +- /* re-enable phy compensation engine when dram init on both DCTs is completed. */ +- val = Get_NB32_index_wait(pDCTstat->dev_dct, 0x98, 0x8); +- val &= ~(1 << DisAutoComp); +- Set_NB32_index_wait(pDCTstat->dev_dct, 0x98, 0x8, val); ++ ++ if (!is_fam15h()) { ++ /* v6.1.3 */ ++ /* re-enable phy compensation engine when dram init is completed on all nodes. */ ++ for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { ++ struct DCTStatStruc *pDCTstat; ++ pDCTstat = pDCTstatA + Node; ++ if (pDCTstat->NodePresent) { ++ if (pDCTstat->DIMMValidDCT[0] > 0 || pDCTstat->DIMMValidDCT[1] > 0) { ++ /* re-enable phy compensation engine when dram init on both DCTs is completed. */ ++ val = Get_NB32_index_wait(pDCTstat->dev_dct, 0x98, 0x8); ++ val &= ~(1 << DisAutoComp); ++ Set_NB32_index_wait(pDCTstat->dev_dct, 0x98, 0x8, val); ++ } + } + } + } ++ + /* wait 750us before any memory access can be made. */ + mct_Wait(15000); + } +@@ -911,10 +2371,9 @@ static void StartupDCT_D(struct MCTStatStruc *pMCTstat, + */ + u32 val; + u32 dev; +- u32 reg_off = dct * 0x100; + + dev = pDCTstat->dev_dct; +- val = Get_NB32(dev, 0x94 + reg_off); ++ val = Get_NB32_DCT(dev, dct, 0x94); + if (val & (1<<MemClkFreqVal)) { + mctHookBeforeDramInit(); /* generalized Hook */ + if (!(pMCTstat->GStatus & (1 << GSB_EnDIMMSpareNW))) +@@ -929,23 +2388,23 @@ static void ClearDCT_D(struct MCTStatStruc *pMCTstat, + { + u32 reg_end; + u32 dev = pDCTstat->dev_dct; +- u32 reg = 0x40 + 0x100 * dct; ++ u32 reg = 0x40; + u32 val = 0; + + if (pMCTstat->GStatus & (1 << GSB_EnDIMMSpareNW)) { +- reg_end = 0x78 + 0x100 * dct; ++ reg_end = 0x78; + } else { +- reg_end = 0xA4 + 0x100 * dct; ++ reg_end = 0xA4; + } + + while(reg < reg_end) { + if ((reg & 0xFF) == 0x90) { + if (pDCTstat->LogicalCPUID & AMD_DR_Dx) { +- val = Get_NB32(dev, reg); /* get DRAMConfigLow */ ++ val = Get_NB32_DCT(dev, dct, reg); /* get DRAMConfigLow */ + val |= 0x08000000; /* preserve value of DisDllShutdownSR for only Rev.D */ + } + } +- Set_NB32(dev, reg, val); ++ Set_NB32_DCT(dev, dct, reg, val); + val = 0; + reg += 4; + } +@@ -964,6 +2423,7 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat, + u16 Trp, Trrd, Trcd, Tras, Trc; + u8 Trfc[4]; + u16 Tfaw; ++ u16 Tcwl; /* Fam15h only */ + u32 DramTimingLo, DramTimingHi; + u8 tCK16x; + u16 Twtr; +@@ -972,10 +2432,11 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat, + u8 byte; + u32 dword; + u32 dev; +- u32 reg_off; + u32 val; + u16 smbaddr; + ++ printk(BIOS_DEBUG, "%s: Start\n", __func__); ++ + /* Gather all DIMM mini-max values for cycle timing data */ + Trp = 0; + Trrd = 0; +@@ -1188,88 +2649,164 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat, + + mctAdjustAutoCycTmg_D(); + ++ if (is_fam15h()) { ++ /* Compute Tcwl (Fam15h BKDG v3.14 Table 203) */ ++ if (pDCTstat->Speed <= 0x6) ++ Tcwl = 0x5; ++ else if (pDCTstat->Speed == 0xa) ++ Tcwl = 0x6; ++ else if (pDCTstat->Speed == 0xe) ++ Tcwl = 0x7; ++ else if (pDCTstat->Speed == 0x12) ++ Tcwl = 0x8; ++ else if (pDCTstat->Speed == 0x16) ++ Tcwl = 0x9; ++ else ++ Tcwl = 0x5; /* Power-on default */ ++ } ++ + /* Program DRAM Timing values */ +- DramTimingLo = 0; /* Dram Timing Low init */ +- val = pDCTstat->CASL - 4; /* pDCTstat.CASL to reg. definition */ +- DramTimingLo |= val; ++ if (is_fam15h()) { ++ dev = pDCTstat->dev_dct; + +- val = pDCTstat->Trcd - Bias_TrcdT; +- DramTimingLo |= val<<4; ++ dword = Get_NB32_DCT(dev, dct, 0x8c); /* DRAM Timing High */ ++ val = 2; /* Tref = 7.8us */ ++ dword &= ~(0x3 << 16); ++ dword |= (val & 0x3) << 16; ++ Set_NB32_DCT(dev, dct, 0x8c, dword); /* DRAM Timing High */ ++ ++ dword = Get_NB32_DCT(dev, dct, 0x200); /* DRAM Timing 0 */ ++ dword &= ~(0x3f1f1f1f); ++ dword |= ((pDCTstat->Tras + 0xf) & 0x3f) << 24; /* Tras */ ++ dword |= ((pDCTstat->Trp + 0x5) & 0x1f) << 16; /* Trp */ ++ dword |= ((pDCTstat->Trcd + 0x5) & 0x1f) << 8; /* Trcd */ ++ dword |= (pDCTstat->CASL & 0x1f); /* Tcl */ ++ Set_NB32_DCT(dev, dct, 0x200, dword); /* DRAM Timing 0 */ ++ ++ dword = Get_NB32_DCT(dev, dct, 0x204); /* DRAM Timing 1 */ ++ dword &= ~(0x0f3f0f3f); ++ dword |= ((pDCTstat->Trtp + 0x4) & 0xf) << 24; /* Trtp */ ++ if (pDCTstat->Tfaw != 0) ++ dword |= ((((pDCTstat->Tfaw - 0x1) * 2) + 0x10) & 0x3f) << 16; /* FourActWindow */ ++ dword |= ((pDCTstat->Trrd + 0x4) & 0xf) << 8; /* Trrd */ ++ dword |= ((pDCTstat->Trc + 0xb) & 0x3f); /* Trc */ ++ Set_NB32_DCT(dev, dct, 0x204, dword); /* DRAM Timing 1 */ ++ ++ dword = Get_NB32_DCT(dev, dct, 0x208); /* DRAM Timing 2 */ ++ dword &= ~(0x07070707); ++ dword |= (pDCTstat->Trfc[3] & 0x7) << 24; /* Trfc3 */ ++ dword |= (pDCTstat->Trfc[2] & 0x7) << 16; /* Trfc2 */ ++ dword |= (pDCTstat->Trfc[1] & 0x7) << 8; /* Trfc1 */ ++ dword |= (pDCTstat->Trfc[0] & 0x7); /* Trfc0 */ ++ Set_NB32_DCT(dev, dct, 0x208, dword); /* DRAM Timing 2 */ ++ ++ dword = Get_NB32_DCT(dev, dct, 0x20c); /* DRAM Timing 3 */ ++ dword &= ~(0x00000f00); ++ dword |= ((pDCTstat->Twtr + 0x4) & 0xf) << 8; /* Twtr */ ++ dword &= ~(0x0000001f); ++ dword |= (Tcwl & 0x1f); /* Tcwl */ ++ Set_NB32_DCT(dev, dct, 0x20c, dword); /* DRAM Timing 3 */ ++ ++ dword = Get_NB32_DCT(dev, dct, 0x22c); /* DRAM Timing 10 */ ++ dword &= ~(0x0000001f); ++ dword |= ((pDCTstat->Twr + 0x4) & 0x1f); /* Twr */ ++ Set_NB32_DCT(dev, dct, 0x22c, dword); /* DRAM Timing 10 */ ++ ++ if (pDCTstat->Speed > mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) { ++ /* Enable phy-assisted training mode */ ++ fam15EnableTrainingMode(pMCTstat, pDCTstat, dct, 1); ++ } + +- val = pDCTstat->Trp - Bias_TrpT; +- val = mct_AdjustSPDTimings(pMCTstat, pDCTstat, val); +- DramTimingLo |= val<<7; ++ /* Other setup (not training specific) */ ++ dword = Get_NB32_DCT(dev, dct, 0x90); /* DRAM Configuration Low */ ++ dword &= ~(0x1 << 23); /* ForceAutoPchg = 0 */ ++ dword &= ~(0x1 << 20); /* DynPageCloseEn = 0 */ ++ Set_NB32_DCT(dev, dct, 0x90, dword); /* DRAM Configuration Low */ + +- val = pDCTstat->Trtp - Bias_TrtpT; +- DramTimingLo |= val<<10; ++ Set_NB32_DCT(dev, dct, 0x228, 0x14141414); /* DRAM Timing 9 */ ++ } else { ++ DramTimingLo = 0; /* Dram Timing Low init */ ++ val = pDCTstat->CASL - 4; /* pDCTstat.CASL to reg. definition */ ++ DramTimingLo |= val; + +- val = pDCTstat->Tras - Bias_TrasT; +- DramTimingLo |= val<<12; ++ val = pDCTstat->Trcd - Bias_TrcdT; ++ DramTimingLo |= val<<4; + +- val = pDCTstat->Trc - Bias_TrcT; +- DramTimingLo |= val<<16; ++ val = pDCTstat->Trp - Bias_TrpT; ++ val = mct_AdjustSPDTimings(pMCTstat, pDCTstat, val); ++ DramTimingLo |= val<<7; + +- val = pDCTstat->Trrd - Bias_TrrdT; +- DramTimingLo |= val<<22; ++ val = pDCTstat->Trtp - Bias_TrtpT; ++ DramTimingLo |= val<<10; + +- DramTimingHi = 0; /* Dram Timing High init */ +- val = pDCTstat->Twtr - Bias_TwtrT; +- DramTimingHi |= val<<8; ++ val = pDCTstat->Tras - Bias_TrasT; ++ DramTimingLo |= val<<12; + +- val = 2; +- DramTimingHi |= val<<16; ++ val = pDCTstat->Trc - Bias_TrcT; ++ DramTimingLo |= val<<16; + +- val = 0; +- for (i=4;i>0;i--) { +- val <<= 3; +- val |= Trfc[i-1]; +- } +- DramTimingHi |= val << 20; ++ val = pDCTstat->Trrd - Bias_TrrdT; ++ DramTimingLo |= val<<22; + +- dev = pDCTstat->dev_dct; +- reg_off = 0x100 * dct; +- /* Twr */ +- val = pDCTstat->Twr; +- if (val == 10) +- val = 9; +- else if (val == 12) +- val = 10; +- val = mct_AdjustSPDTimings(pMCTstat, pDCTstat, val); +- val -= Bias_TwrT; +- val <<= 4; +- dword = Get_NB32(dev, 0x84 + reg_off); +- dword &= ~0x70; +- dword |= val; +- Set_NB32(dev, 0x84 + reg_off, dword); ++ DramTimingHi = 0; /* Dram Timing High init */ ++ val = pDCTstat->Twtr - Bias_TwtrT; ++ DramTimingHi |= val<<8; + +- /* Tfaw */ +- val = pDCTstat->Tfaw; +- val = mct_AdjustSPDTimings(pMCTstat, pDCTstat, val); +- val -= Bias_TfawT; +- val >>= 1; +- val <<= 28; +- dword = Get_NB32(dev, 0x94 + reg_off); +- dword &= ~0xf0000000; +- dword |= val; +- Set_NB32(dev, 0x94 + reg_off, dword); +- +- /* dev = pDCTstat->dev_dct; */ +- /* reg_off = 0x100 * dct; */ +- +- if (pDCTstat->Speed > 4) { +- val = Get_NB32(dev, 0x88 + reg_off); +- val &= 0xFF000000; +- DramTimingLo |= val; +- } +- Set_NB32(dev, 0x88 + reg_off, DramTimingLo); /*DCT Timing Low*/ ++ val = 2; /* Tref = 7.8us */ ++ DramTimingHi |= val<<16; ++ ++ val = 0; ++ for (i=4;i>0;i--) { ++ val <<= 3; ++ val |= Trfc[i-1]; ++ } ++ DramTimingHi |= val << 20; ++ ++ dev = pDCTstat->dev_dct; ++ /* Twr */ ++ val = pDCTstat->Twr; ++ if (val == 10) ++ val = 9; ++ else if (val == 12) ++ val = 10; ++ val = mct_AdjustSPDTimings(pMCTstat, pDCTstat, val); ++ val -= Bias_TwrT; ++ val <<= 4; ++ dword = Get_NB32_DCT(dev, dct, 0x84); ++ dword &= ~0x70; ++ dword |= val; ++ Set_NB32_DCT(dev, dct, 0x84, dword); ++ ++ /* Tfaw */ ++ val = pDCTstat->Tfaw; ++ val = mct_AdjustSPDTimings(pMCTstat, pDCTstat, val); ++ val -= Bias_TfawT; ++ val >>= 1; ++ val <<= 28; ++ dword = Get_NB32_DCT(dev, dct, 0x94); ++ dword &= ~0xf0000000; ++ dword |= val; ++ Set_NB32_DCT(dev, dct, 0x94, dword); ++ ++ /* dev = pDCTstat->dev_dct; */ ++ ++ if (pDCTstat->Speed > mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) { ++ val = Get_NB32_DCT(dev, dct, 0x88); ++ val &= 0xFF000000; ++ DramTimingLo |= val; ++ } ++ Set_NB32_DCT(dev, dct, 0x88, DramTimingLo); /*DCT Timing Low*/ + +- if (pDCTstat->Speed > 4) { +- DramTimingHi |= 1 << DisAutoRefresh; ++ if (pDCTstat->Speed > mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) { ++ DramTimingHi |= 1 << DisAutoRefresh; ++ } ++ DramTimingHi |= 0x000018FF; ++ Set_NB32_DCT(dev, dct, 0x8c, DramTimingHi); /*DCT Timing Hi*/ + } +- DramTimingHi |= 0x000018FF; +- Set_NB32(dev, 0x8c + reg_off, DramTimingHi); /*DCT Timing Hi*/ + + /* dump_pci_device(PCI_DEV(0, 0x18+pDCTstat->Node_ID, 2)); */ ++ ++ printk(BIOS_DEBUG, "%s: Done\n", __func__); + } + + static u8 AutoCycTiming_D(struct MCTStatStruc *pMCTstat, +@@ -1303,6 +2840,8 @@ static u8 AutoCycTiming_D(struct MCTStatStruc *pMCTstat, + * timing mode is 'Auto'. + */ + ++ printk(BIOS_DEBUG, "%s: Start\n", __func__); ++ + /* Get primary timing (CAS Latency and Cycle Time) */ + if (pDCTstat->Speed == 0) { + mctGet_MaxLoadFreq(pDCTstat); +@@ -1312,6 +2851,7 @@ static u8 AutoCycTiming_D(struct MCTStatStruc *pMCTstat, + + /* Go get best T and CL as specified by DIMM mfgs. and OEM */ + SPDGetTCL_D(pMCTstat, pDCTstat, dct); ++ + /* skip callback mctForce800to1067_D */ + pDCTstat->Speed = pDCTstat->DIMMAutoSpeed; + pDCTstat->CASL = pDCTstat->DIMMCASL; +@@ -1344,7 +2884,10 @@ static void GetPresetmaxF_D(struct MCTStatStruc *pMCTstat, + u16 word; + + /* Get CPU Si Revision defined limit (NPT) */ +- proposedFreq = 800; /* Rev F0 programmable max memclock is */ ++ if (is_fam15h()) ++ proposedFreq = 933; ++ else ++ proposedFreq = 800; /* Rev F0 programmable max memclock is */ + + /*Get User defined limit if "limit" mode */ + if ( mctGet_NVbits(NV_MCTUSRTMGMODE) == 1) { +@@ -1381,6 +2924,7 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat, + u16 tCKmin16x; + u16 tCKproposed16x; + u8 CLactual, CLdesired, CLT_Fail; ++ uint16_t min_frequency_tck16x; + + u8 smbaddr, byte = 0, bytex = 0; + +@@ -1390,6 +2934,17 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat, + tCKmin16x = 0; + CLT_Fail = 0; + ++ printk(BIOS_DEBUG, "%s: Start\n", __func__); ++ ++ if (is_fam15h()) { ++ uint16_t minimum_frequency_mhz = mctGet_NVbits(NV_MIN_MEMCLK); ++ if (minimum_frequency_mhz == 0) ++ minimum_frequency_mhz = 333; ++ min_frequency_tck16x = 16000 / minimum_frequency_mhz; ++ } else { ++ min_frequency_tck16x = 40; ++ } ++ + for (i = 0; i < MAX_DIMMS_SUPPORTED; i++) { + if (pDCTstat->DIMMValid & (1 << i)) { + smbaddr = Get_DIMMAddress_D(pDCTstat, (dct + i)); +@@ -1419,27 +2974,44 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat, + tCKmin16x = byte * MTB16x; + } + } +- /* calculate tCKproposed16x */ ++ /* calculate tCKproposed16x (proposed clock period in ns * 16) */ + tCKproposed16x = 16000 / pDCTstat->PresetmaxFreq; + if (tCKmin16x > tCKproposed16x) + tCKproposed16x = tCKmin16x; + +- /* mctHookTwo1333DimmOverride(); */ +- /* For UDIMM, if there are two DDR3-1333 on the same channel, +- downgrade DDR speed to 1066. */ +- + /* TODO: get user manual tCK16x(Freq.) and overwrite current tCKproposed16x if manual. */ +- if (tCKproposed16x == 20) +- pDCTstat->TargetFreq = 7; +- else if (tCKproposed16x <= 24) { +- pDCTstat->TargetFreq = 6; +- tCKproposed16x = 24; +- } else if (tCKproposed16x <= 30) { +- pDCTstat->TargetFreq = 5; +- tCKproposed16x = 30; ++ if (is_fam15h()) { ++ if (tCKproposed16x == 17) ++ pDCTstat->TargetFreq = 0x16; ++ else if (tCKproposed16x <= 20) { ++ pDCTstat->TargetFreq = 0x12; ++ tCKproposed16x = 20; ++ } else if (tCKproposed16x <= 24) { ++ pDCTstat->TargetFreq = 0xe; ++ tCKproposed16x = 24; ++ } else if (tCKproposed16x <= 30) { ++ pDCTstat->TargetFreq = 0xa; ++ tCKproposed16x = 30; ++ } else if (tCKproposed16x <= 40) { ++ pDCTstat->TargetFreq = 0x6; ++ tCKproposed16x = 40; ++ } else { ++ pDCTstat->TargetFreq = 0x4; ++ tCKproposed16x = 48; ++ } + } else { +- pDCTstat->TargetFreq = 4; +- tCKproposed16x = 40; ++ if (tCKproposed16x == 20) ++ pDCTstat->TargetFreq = 7; ++ else if (tCKproposed16x <= 24) { ++ pDCTstat->TargetFreq = 6; ++ tCKproposed16x = 24; ++ } else if (tCKproposed16x <= 30) { ++ pDCTstat->TargetFreq = 5; ++ tCKproposed16x = 30; ++ } else { ++ pDCTstat->TargetFreq = 4; ++ tCKproposed16x = 40; ++ } + } + /* Running through this loop twice: + - First time find tCL at target frequency +@@ -1478,27 +3050,42 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat, + /* get CL and T */ + if (!CLT_Fail) { + bytex = CLactual; +- if (tCKproposed16x == 20) +- byte = 7; +- else if (tCKproposed16x == 24) +- byte = 6; +- else if (tCKproposed16x == 30) +- byte = 5; +- else +- byte = 4; ++ if (is_fam15h()) { ++ if (tCKproposed16x == 17) ++ byte = 0x16; ++ else if (tCKproposed16x == 20) ++ byte = 0x12; ++ else if (tCKproposed16x == 24) ++ byte = 0xe; ++ else if (tCKproposed16x == 30) ++ byte = 0xa; ++ else if (tCKproposed16x == 40) ++ byte = 0x6; ++ else ++ byte = 0x4; ++ } else { ++ if (tCKproposed16x == 20) ++ byte = 7; ++ else if (tCKproposed16x == 24) ++ byte = 6; ++ else if (tCKproposed16x == 30) ++ byte = 5; ++ else ++ byte = 4; ++ } + } else { + /* mctHookManualCLOverride */ + /* TODO: */ + } + +- if (tCKproposed16x != 40) { ++ if (tCKproposed16x != min_frequency_tck16x) { + if (pMCTstat->GStatus & (1 << GSB_EnDIMMSpareNW)) { + pDCTstat->DIMMAutoSpeed = byte; + pDCTstat->DIMMCASL = bytex; + break; + } else { + pDCTstat->TargetCASL = bytex; +- tCKproposed16x = 40; ++ tCKproposed16x = min_frequency_tck16x; + } + } else { + pDCTstat->DIMMAutoSpeed = byte; +@@ -1519,29 +3106,21 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat, + static u8 PlatformSpec_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) + { +- u32 dev; +- u32 reg; +- u32 val; ++ if (!is_fam15h()) { ++ mctGet_PS_Cfg_D(pMCTstat, pDCTstat, dct); + +- mctGet_PS_Cfg_D(pMCTstat, pDCTstat, dct); ++ if (pDCTstat->GangedMode == 1) { ++ mctGet_PS_Cfg_D(pMCTstat, pDCTstat, 1); ++ mct_BeforePlatformSpec(pMCTstat, pDCTstat, 1); ++ } + +- if (pDCTstat->GangedMode == 1) { +- mctGet_PS_Cfg_D(pMCTstat, pDCTstat, 1); +- mct_BeforePlatformSpec(pMCTstat, pDCTstat, 1); +- } ++ set_2t_configuration(pMCTstat, pDCTstat, dct); + +- if ( pDCTstat->_2Tmode == 2) { +- dev = pDCTstat->dev_dct; +- reg = 0x94 + 0x100 * dct; /* Dram Configuration Hi */ +- val = Get_NB32(dev, reg); +- val |= 1 << 20; /* 2T CMD mode */ +- Set_NB32(dev, reg, val); ++ mct_BeforePlatformSpec(pMCTstat, pDCTstat, dct); ++ mct_PlatformSpec(pMCTstat, pDCTstat, dct); ++ if (pDCTstat->DIMMAutoSpeed == mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) ++ InitPhyCompensation(pMCTstat, pDCTstat, dct); + } +- +- mct_BeforePlatformSpec(pMCTstat, pDCTstat, dct); +- mct_PlatformSpec(pMCTstat, pDCTstat, dct); +- if (pDCTstat->DIMMAutoSpeed == 4) +- InitPhyCompensation(pMCTstat, pDCTstat, dct); + mctHookAfterPSCfg(); + + return pDCTstat->ErrCode; +@@ -1553,11 +3132,11 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, + u32 DramControl, DramTimingLo, Status; + u32 DramConfigLo, DramConfigHi, DramConfigMisc, DramConfigMisc2; + u32 val; +- u32 reg_off; + u32 dev; + u16 word; + u32 dword; + u8 byte; ++ uint32_t offset; + + DramConfigLo = 0; + DramConfigHi = 0; +@@ -1577,12 +3156,10 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, + Status = pDCTstat->Status; + + dev = pDCTstat->dev_dct; +- reg_off = 0x100 * dct; +- + + /* Build Dram Control Register Value */ +- DramConfigMisc2 = Get_NB32 (dev, 0xA8 + reg_off); /* Dram Control*/ +- DramControl = Get_NB32 (dev, 0x78 + reg_off); /* Dram Control*/ ++ DramConfigMisc2 = Get_NB32_DCT(dev, dct, 0xA8); /* Dram Control*/ ++ DramControl = Get_NB32_DCT(dev, dct, 0x78); /* Dram Control*/ + + /* FIXME: Skip mct_checkForDxSupport */ + /* REV_CALL mct_DoRdPtrInit if not Dx */ +@@ -1624,8 +3201,12 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, + DramConfigLo = mct_DisDllShutdownSR(pMCTstat, pDCTstat, DramConfigLo, dct); + + /* Build Dram Config Hi Register Value */ ++ if (is_fam15h()) ++ offset = 0x0; ++ else ++ offset = 0x1; + dword = pDCTstat->Speed; +- DramConfigHi |= dword - 1; /* get MemClk encoding */ ++ DramConfigHi |= dword - offset; /* get MemClk encoding */ + DramConfigHi |= 1 << MemClkFreqVal; + + if (Status & (1 << SB_Registered)) +@@ -1658,7 +3239,7 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, + val = 0x0f; /* recommended setting (default) */ + DramConfigHi |= val << 24; + +- if (pDCTstat->LogicalCPUID & (AMD_DR_Dx | AMD_DR_Cx | AMD_DR_Bx)) ++ if (pDCTstat->LogicalCPUID & (AMD_DR_Dx | AMD_DR_Cx | AMD_DR_Bx | AMD_FAM15_ALL)) + DramConfigHi |= 1 << DcqArbBypassEn; + + /* Build MemClkDis Value from Dram Timing Lo and +@@ -1669,7 +3250,7 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, + NV_AllMemClks <>0 AND SB_DiagClks ==0 */ + + /* Dram Timing Low (owns Clock Enable bits) */ +- DramTimingLo = Get_NB32(dev, 0x88 + reg_off); ++ DramTimingLo = Get_NB32_DCT(dev, dct, 0x88); + if (mctGet_NVbits(NV_AllMemClks) == 0) { + /* Special Jedec SPD diagnostic bit - "enable all clocks" */ + if (!(pDCTstat->Status & (1<<SB_DiagClks))) { +@@ -1700,28 +3281,34 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, + } + dword++ ; + } ++ DramTimingLo &= ~(0xff << 24); + DramTimingLo |= byte << 24; + } + } + +- printk(BIOS_DEBUG, "AutoConfig_D: DramControl: %x\n", DramControl); +- printk(BIOS_DEBUG, "AutoConfig_D: DramTimingLo: %x\n", DramTimingLo); +- printk(BIOS_DEBUG, "AutoConfig_D: DramConfigMisc: %x\n", DramConfigMisc); +- printk(BIOS_DEBUG, "AutoConfig_D: DramConfigMisc2: %x\n", DramConfigMisc2); +- printk(BIOS_DEBUG, "AutoConfig_D: DramConfigLo: %x\n", DramConfigLo); +- printk(BIOS_DEBUG, "AutoConfig_D: DramConfigHi: %x\n", DramConfigHi); ++ printk(BIOS_DEBUG, "AutoConfig_D: DramControl: %08x\n", DramControl); ++ printk(BIOS_DEBUG, "AutoConfig_D: DramTimingLo: %08x\n", DramTimingLo); ++ printk(BIOS_DEBUG, "AutoConfig_D: DramConfigMisc: %08x\n", DramConfigMisc); ++ printk(BIOS_DEBUG, "AutoConfig_D: DramConfigMisc2: %08x\n", DramConfigMisc2); ++ printk(BIOS_DEBUG, "AutoConfig_D: DramConfigLo: %08x\n", DramConfigLo); ++ printk(BIOS_DEBUG, "AutoConfig_D: DramConfigHi: %08x\n", DramConfigHi); + + /* Write Values to the registers */ +- Set_NB32(dev, 0x78 + reg_off, DramControl); +- Set_NB32(dev, 0x88 + reg_off, DramTimingLo); +- Set_NB32(dev, 0xA0 + reg_off, DramConfigMisc); ++ Set_NB32_DCT(dev, dct, 0x78, DramControl); ++ Set_NB32_DCT(dev, dct, 0x88, DramTimingLo); ++ Set_NB32_DCT(dev, dct, 0xa0, DramConfigMisc); + DramConfigMisc2 = mct_SetDramConfigMisc2(pDCTstat, dct, DramConfigMisc2); +- Set_NB32(dev, 0xA8 + reg_off, DramConfigMisc2); +- Set_NB32(dev, 0x90 + reg_off, DramConfigLo); ++ Set_NB32_DCT(dev, dct, 0xa8, DramConfigMisc2); ++ Set_NB32_DCT(dev, dct, 0x90, DramConfigLo); + ProgDramMRSReg_D(pMCTstat, pDCTstat, dct); +- dword = Get_NB32(dev, 0x94 + reg_off); ++ ++ if (is_fam15h()) ++ InitDDRPhy(pMCTstat, pDCTstat, dct); ++ ++ /* Write the DRAM Configuration High register, including memory frequency change */ ++ dword = Get_NB32_DCT(dev, dct, 0x94); + DramConfigHi |= dword; +- mct_SetDramConfigHi_D(pDCTstat, dct, DramConfigHi); ++ mct_SetDramConfigHi_D(pMCTstat, pDCTstat, dct, DramConfigHi); + mct_EarlyArbEn_D(pMCTstat, pDCTstat, dct); + mctHookAfterAutoCfg(); + +@@ -1731,6 +3318,7 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, + printk(BIOS_DEBUG, "AutoConfig: ErrStatus %x\n", pDCTstat->ErrStatus); + printk(BIOS_DEBUG, "AutoConfig: ErrCode %x\n", pDCTstat->ErrCode); + printk(BIOS_DEBUG, "AutoConfig: Done\n\n"); ++ + AutoConfig_exit: + return pDCTstat->ErrCode; + } +@@ -1748,14 +3336,12 @@ static void SPDSetBanks_D(struct MCTStatStruc *pMCTstat, + u32 val; + u32 reg; + u32 dev; +- u32 reg_off; + u8 byte; + u16 word; + u32 dword; + u16 smbaddr; + + dev = pDCTstat->dev_dct; +- reg_off = 0x100 * dct; + + BankAddrReg = 0; + for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel+=2) { +@@ -1820,10 +3406,10 @@ static void SPDSetBanks_D(struct MCTStatStruc *pMCTstat, + /*set ChipSelect population indicator odd bits*/ + pDCTstat->CSPresent |= 1 << (ChipSel + 1); + +- reg = 0x60+(ChipSel<<1) + reg_off; /*Dram CS Mask Register */ ++ reg = 0x60+(ChipSel<<1); /*Dram CS Mask Register */ + val = csMask; + val &= 0x1FF83FE0; /* Mask out reserved bits.*/ +- Set_NB32(dev, reg, val); ++ Set_NB32_DCT(dev, dct, reg, val); + } else { + if (pDCTstat->DIMMSPDCSE & (1<<ChipSel)) + pDCTstat->CSTestFail |= (1<<ChipSel); +@@ -1847,8 +3433,8 @@ static void SPDSetBanks_D(struct MCTStatStruc *pMCTstat, + if (!pDCTstat->CSPresent) + pDCTstat->ErrCode = SC_StopError; + +- reg = 0x80 + reg_off; /* Bank Addressing Register */ +- Set_NB32(dev, reg, BankAddrReg); ++ reg = 0x80; /* Bank Addressing Register */ ++ Set_NB32_DCT(dev, dct, reg, BankAddrReg); + + pDCTstat->CSPresent_DCT[dct] = pDCTstat->CSPresent; + /* dump_pci_device(PCI_DEV(0, 0x18+pDCTstat->Node_ID, 2)); */ +@@ -1933,11 +3519,9 @@ static void StitchMemory_D(struct MCTStatStruc *pMCTstat, + u16 word; + u32 dev; + u32 reg; +- u32 reg_off; + u32 val; + + dev = pDCTstat->dev_dct; +- reg_off = 0x100 * dct; + + _DSpareEn = 0; + +@@ -1974,11 +3558,11 @@ static void StitchMemory_D(struct MCTStatStruc *pMCTstat, + BiggestBank = 0; + for (q = 0; q < MAX_CS_SUPPORTED; q++) { /* from DIMMS to CS */ + if (pDCTstat->CSPresent & (1 << q)) { /* bank present? */ +- reg = 0x40 + (q << 2) + reg_off; /* Base[q] reg.*/ +- val = Get_NB32(dev, reg); ++ reg = 0x40 + (q << 2); /* Base[q] reg.*/ ++ val = Get_NB32_DCT(dev, dct, reg); + if (!(val & 3)) { /* (CSEnable|Spare==1)bank is enabled already? */ +- reg = 0x60 + (q << 1) + reg_off; /*Mask[q] reg.*/ +- val = Get_NB32(dev, reg); ++ reg = 0x60 + (q << 1); /*Mask[q] reg.*/ ++ val = Get_NB32_DCT(dev, dct, reg); + val >>= 19; + val++; + val <<= 19; +@@ -1994,7 +3578,7 @@ static void StitchMemory_D(struct MCTStatStruc *pMCTstat, + if (BiggestBank !=0) { + curcsBase = nxtcsBase; /* curcsBase=nxtcsBase*/ + /* DRAM CS Base b Address Register offset */ +- reg = 0x40 + (b << 2) + reg_off; ++ reg = 0x40 + (b << 2); + if (_DSpareEn) { + BiggestBank = 0; + val = 1 << Spare; /* Spare Enable*/ +@@ -2013,7 +3597,7 @@ static void StitchMemory_D(struct MCTStatStruc *pMCTstat, + } + } + } +- Set_NB32(dev, reg, val); ++ Set_NB32_DCT(dev, dct, reg, val); + if (_DSpareEn) + _DSpareEn = 0; + else +@@ -2024,9 +3608,9 @@ static void StitchMemory_D(struct MCTStatStruc *pMCTstat, + /* bank present but disabled?*/ + if ( pDCTstat->CSTestFail & (1 << p)) { + /* DRAM CS Base b Address Register offset */ +- reg = (p << 2) + 0x40 + reg_off; ++ reg = (p << 2) + 0x40; + val = 1 << TestFail; +- Set_NB32(dev, reg, val); ++ Set_NB32_DCT(dev, dct, reg, val); + } + } + +@@ -2064,7 +3648,7 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat, + u16 i, j, k; + u8 smbaddr; + u8 SPDCtrl; +- u16 RegDIMMPresent, MaxDimms; ++ u16 RegDIMMPresent, LRDIMMPresent, MaxDimms; + u8 devwidth; + u16 DimmSlots; + u8 byte = 0, bytex; +@@ -2077,6 +3661,7 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat, + SPDCtrl = mctGet_NVbits(NV_SPDCHK_RESTRT); + + RegDIMMPresent = 0; ++ LRDIMMPresent = 0; + pDCTstat->DimmQRPresent = 0; + + for (i = 0; i < MAX_DIMMS_SUPPORTED; i++) { +@@ -2115,6 +3700,7 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat, + pDCTstat->DimmManufacturerID[i] |= ((uint64_t)mctRead_SPD(smbaddr, SPD_MANID_START + k)) << (k * 8); + for (k = 0; k < SPD_PARTN_LENGTH; k++) + pDCTstat->DimmPartNumber[i][k] = mctRead_SPD(smbaddr, SPD_PARTN_START + k); ++ pDCTstat->DimmPartNumber[i][SPD_PARTN_LENGTH] = 0; + pDCTstat->DimmRevisionNumber[i] = 0; + for (k = 0; k < 2; k++) + pDCTstat->DimmRevisionNumber[i] |= ((uint16_t)mctRead_SPD(smbaddr, SPD_REVNO_START + k)) << (k * 8); +@@ -2138,6 +3724,12 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat, + } else { + pDCTstat->DimmRegistered[i] = 0; + } ++ if (byte == JED_LRDIMM) { ++ LRDIMMPresent |= 1 << i; ++ pDCTstat->DimmLoadReduced[i] = 1; ++ } else { ++ pDCTstat->DimmLoadReduced[i] = 0; ++ } + /* Check ECC capable */ + byte = mctRead_SPD(smbaddr, SPD_BusWidth); + if (byte & JED_ECC) { +@@ -2221,6 +3813,7 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat, + printk(BIOS_DEBUG, "\t DIMMPresence: DIMMValid=%x\n", pDCTstat->DIMMValid); + printk(BIOS_DEBUG, "\t DIMMPresence: DIMMPresent=%x\n", pDCTstat->DIMMPresent); + printk(BIOS_DEBUG, "\t DIMMPresence: RegDIMMPresent=%x\n", RegDIMMPresent); ++ printk(BIOS_DEBUG, "\t DIMMPresence: LRDIMMPresent=%x\n", LRDIMMPresent); + printk(BIOS_DEBUG, "\t DIMMPresence: DimmECCPresent=%x\n", pDCTstat->DimmECCPresent); + printk(BIOS_DEBUG, "\t DIMMPresence: DimmPARPresent=%x\n", pDCTstat->DimmPARPresent); + printk(BIOS_DEBUG, "\t DIMMPresence: Dimmx4Present=%x\n", pDCTstat->Dimmx4Present); +@@ -2247,6 +3840,16 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat, + pDCTstat->Status |= 1<<SB_Registered; + } + } ++ if (LRDIMMPresent != 0) { ++ if ((LRDIMMPresent ^ pDCTstat->DIMMValid) !=0) { ++ /* module type DIMM mismatch (reg'ed, unbuffered) */ ++ pDCTstat->ErrStatus |= 1<<SB_DimmMismatchM; ++ pDCTstat->ErrCode = SC_StopError; ++ } else{ ++ /* all DIMMs are registered */ ++ pDCTstat->Status |= 1<<SB_LoadReduced; ++ } ++ } + if (pDCTstat->DimmECCPresent != 0) { + if ((pDCTstat->DimmECCPresent ^ pDCTstat->DIMMValid )== 0) { + /* all DIMMs are ECC capable */ +@@ -2284,6 +3887,26 @@ static u8 Get_DIMMAddress_D(struct DCTStatStruc *pDCTstat, u8 i) + return p[i]; + } + ++static void mct_preInitDCT(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat) ++{ ++ u8 err_code; ++ ++ /* Preconfigure DCT0 */ ++ DCTPreInit_D(pMCTstat, pDCTstat, 0); ++ ++ /* Configure DCT1 if unganged and enabled*/ ++ if (!pDCTstat->GangedMode) { ++ if (pDCTstat->DIMMValidDCT[1] > 0) { ++ err_code = pDCTstat->ErrCode; /* save DCT0 errors */ ++ pDCTstat->ErrCode = 0; ++ DCTPreInit_D(pMCTstat, pDCTstat, 1); ++ if (pDCTstat->ErrCode == 2) /* DCT1 is not Running */ ++ pDCTstat->ErrCode = err_code; /* Using DCT0 Error code to update pDCTstat.ErrCode */ ++ } ++ } ++} ++ + static void mct_initDCT(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) + { +@@ -2295,7 +3918,7 @@ static void mct_initDCT(struct MCTStatStruc *pMCTstat, + if (pDCTstat->ErrCode == SC_FatalErr) { + /* Do nothing goto exitDCTInit; any fatal errors? */ + } else { +- /* Configure DCT1 if unganged and enabled*/ ++ /* Configure DCT1 if unganged and enabled */ + if (!pDCTstat->GangedMode) { + if (pDCTstat->DIMMValidDCT[1] > 0) { + err_code = pDCTstat->ErrCode; /* save DCT0 errors */ +@@ -2305,17 +3928,21 @@ static void mct_initDCT(struct MCTStatStruc *pMCTstat, + pDCTstat->ErrCode = err_code; /* Using DCT0 Error code to update pDCTstat.ErrCode */ + } else { + val = 1 << DisDramInterface; +- Set_NB32(pDCTstat->dev_dct, 0x100 + 0x94, val); ++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x94, val); ++ ++ /* To maximize power savings when DisDramInterface=1b, ++ * all of the MemClkDis bits should also be set. ++ */ ++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x88, 0xff000000); + } + } + } +-/* exitDCTInit: */ + } + + static void mct_DramInit(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) + { +- mct_BeforeDramInit_Prod_D(pMCTstat, pDCTstat); ++ mct_BeforeDramInit_Prod_D(pMCTstat, pDCTstat, dct); + mct_DramInit_Sw_D(pMCTstat, pDCTstat, dct); + /* mct_DramInit_Hw_D(pMCTstat, pDCTstat, dct); */ + } +@@ -2343,7 +3970,8 @@ static u8 mct_setMode(struct MCTStatStruc *pMCTstat, + if (byte) + pDCTstat->ErrStatus |= (1 << SB_DimmMismatchO); /* Set temp. to avoid setting of ganged mode */ + +- if (!(pDCTstat->ErrStatus & (1 << SB_DimmMismatchO))) { ++ if ((!(pDCTstat->ErrStatus & (1 << SB_DimmMismatchO))) && (pDCTstat->LogicalCPUID & AMD_FAM10_ALL)) { ++ /* Ganged channel mode not supported on Family 15h or higher */ + pDCTstat->GangedMode = 1; + /* valid 128-bit mode population. */ + pDCTstat->Status |= 1 << SB_128bitmode; +@@ -2387,10 +4015,8 @@ void Set_NB32_index(u32 dev, u32 index_reg, u32 index, u32 data) + + u32 Get_NB32_index_wait(u32 dev, u32 index_reg, u32 index) + { +- + u32 dword; + +- + index &= ~(1 << DctAccessWrite); + Set_NB32(dev, index_reg, index); + do { +@@ -2405,7 +4031,6 @@ void Set_NB32_index_wait(u32 dev, u32 index_reg, u32 index, u32 data) + { + u32 dword; + +- + Set_NB32(dev, index_reg + 0x4, data); + index |= (1 << DctAccessWrite); + Set_NB32(dev, index_reg, index); +@@ -2420,16 +4045,17 @@ static u8 mct_BeforePlatformSpec(struct MCTStatStruc *pMCTstat, + { + /* mct_checkForCxDxSupport_D */ + if (pDCTstat->LogicalCPUID & AMD_DR_GT_Bx) { ++ /* Family 10h Errata 322: Address and Command Fine Delay Values May Be Incorrect */ + /* 1. Write 00000000h to F2x[1,0]9C_xD08E000 */ +- Set_NB32_index_wait(pDCTstat->dev_dct, 0x98 + dct * 0x100, 0x0D08E000, 0); ++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, dct, 0x98, 0x0D08E000, 0); + /* 2. If DRAM Configuration Register[MemClkFreq] (F2x[1,0]94[2:0]) is + greater than or equal to 011b (DDR-800 and higher), + then write 00000080h to F2x[1,0]9C_xD02E001, + else write 00000090h to F2x[1,0]9C_xD02E001. */ +- if (pDCTstat->Speed >= 4) +- Set_NB32_index_wait(pDCTstat->dev_dct, 0x98 + dct * 0x100, 0xD02E001, 0x80); ++ if (pDCTstat->Speed >= mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) ++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, dct, 0x98, 0x0D02E001, 0x80); + else +- Set_NB32_index_wait(pDCTstat->dev_dct, 0x98 + dct * 0x100, 0xD02E001, 0x90); ++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, dct, 0x98, 0x0D02E001, 0x90); + } + return pDCTstat->ErrCode; + } +@@ -2455,9 +4081,9 @@ static u8 mct_PlatformSpec(struct MCTStatStruc *pMCTstat, + i_end = dct + 1; + } + for (i=i_start; i<i_end; i++) { +- index_reg = 0x98 + (i * 0x100); +- Set_NB32_index_wait(dev, index_reg, 0x00, pDCTstat->CH_ODC_CTL[i]); /* Channel A Output Driver Compensation Control */ +- Set_NB32_index_wait(dev, index_reg, 0x04, pDCTstat->CH_ADDR_TMG[i]); /* Channel A Output Driver Compensation Control */ ++ index_reg = 0x98; ++ Set_NB32_index_wait_DCT(dev, i, index_reg, 0x00, pDCTstat->CH_ODC_CTL[i]); /* Channel A Output Driver Compensation Control */ ++ Set_NB32_index_wait_DCT(dev, i, index_reg, 0x04, pDCTstat->CH_ADDR_TMG[i]); /* Channel A Output Driver Compensation Control */ + } + + return pDCTstat->ErrCode; +@@ -2511,14 +4137,14 @@ static u8 mct_SPDCalcWidth(struct MCTStatStruc *pMCTstat, + } + + if (pDCTstat->DIMMValidDCT[0] == 0) { +- val = Get_NB32(pDCTstat->dev_dct, 0x94); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x94); + val |= 1 << DisDramInterface; +- Set_NB32(pDCTstat->dev_dct, 0x94, val); ++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x94, val); + } + if (pDCTstat->DIMMValidDCT[1] == 0) { +- val = Get_NB32(pDCTstat->dev_dct, 0x94 + 0x100); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x94); + val |= 1 << DisDramInterface; +- Set_NB32(pDCTstat->dev_dct, 0x94 + 0x100, val); ++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x94, val); + } + + printk(BIOS_DEBUG, "SPDCalcWidth: Status %x\n", pDCTstat->Status); +@@ -2648,21 +4274,20 @@ static void Set_OtherTiming(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) + { + u32 reg; +- u32 reg_off = 0x100 * dct; + u32 val; + u32 dword; + u32 dev = pDCTstat->dev_dct; + +- Get_DqsRcvEnGross_Diff(pDCTstat, dev, 0x98 + reg_off); +- Get_WrDatGross_Diff(pDCTstat, dct, dev, 0x98 + reg_off); ++ Get_DqsRcvEnGross_Diff(pDCTstat, dev, dct, 0x98); ++ Get_WrDatGross_Diff(pDCTstat, dct, dev, 0x98); + Get_Trdrd(pMCTstat, pDCTstat, dct); + Get_Twrwr(pMCTstat, pDCTstat, dct); + Get_Twrrd(pMCTstat, pDCTstat, dct); + Get_TrwtTO(pMCTstat, pDCTstat, dct); + Get_TrwtWB(pMCTstat, pDCTstat); + +- reg = 0x8C + reg_off; /* Dram Timing Hi */ +- val = Get_NB32(dev, reg); ++ reg = 0x8C; /* Dram Timing Hi */ ++ val = Get_NB32_DCT(dev, dct, reg); + val &= 0xffff0300; + dword = pDCTstat->TrwtTO; + val |= dword << 4; +@@ -2674,10 +4299,10 @@ static void Set_OtherTiming(struct MCTStatStruc *pMCTstat, + val |= dword << 14; + dword = pDCTstat->TrwtWB; + val |= dword; +- Set_NB32(dev, reg, val); ++ Set_NB32_DCT(dev, dct, reg, val); + +- reg = 0x78 + reg_off; +- val = Get_NB32(dev, reg); ++ reg = 0x78; ++ val = Get_NB32_DCT(dev, dct, reg); + val &= 0xFFFFC0FF; + dword = pDCTstat->Twrrd >> 2; + val |= dword << 8; +@@ -2685,7 +4310,7 @@ static void Set_OtherTiming(struct MCTStatStruc *pMCTstat, + val |= dword << 10; + dword = pDCTstat->Trdrd >> 2; + val |= dword << 12; +- Set_NB32(dev, reg, val); ++ Set_NB32_DCT(dev, dct, reg, val); + } + + static void Get_Trdrd(struct MCTStatStruc *pMCTstat, +@@ -2755,18 +4380,17 @@ static void Get_TrwtWB(struct MCTStatStruc *pMCTstat, + static u8 Get_Latency_Diff(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) + { +- u32 reg_off = 0x100 * dct; + u32 dev = pDCTstat->dev_dct; + u32 val1, val2; + +- val1 = Get_NB32(dev, reg_off + 0x88) & 0xF; +- val2 = (Get_NB32(dev, reg_off + 0x84) >> 20) & 7; ++ val1 = Get_NB32_DCT(dev, dct, 0x88) & 0xF; ++ val2 = (Get_NB32_DCT(dev, dct, 0x84) >> 20) & 7; + + return val1 - val2; + } + + static void Get_DqsRcvEnGross_Diff(struct DCTStatStruc *pDCTstat, +- u32 dev, u32 index_reg) ++ u32 dev, uint8_t dct, u32 index_reg) + { + u8 Smallest, Largest; + u32 val; +@@ -2776,12 +4400,12 @@ static void Get_DqsRcvEnGross_Diff(struct DCTStatStruc *pDCTstat, + DqsRcvEnGrossDelay of any other DIMM is equal to the Critical + Gross Delay Difference (CGDD) */ + /* DqsRcvEn byte 1,0 */ +- val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, index_reg, 0x10); ++ val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, dct, index_reg, 0x10); + Largest = val & 0xFF; + Smallest = (val >> 8) & 0xFF; + + /* DqsRcvEn byte 3,2 */ +- val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, index_reg, 0x11); ++ val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, dct, index_reg, 0x11); + byte = val & 0xFF; + bytex = (val >> 8) & 0xFF; + if (bytex < Smallest) +@@ -2790,7 +4414,7 @@ static void Get_DqsRcvEnGross_Diff(struct DCTStatStruc *pDCTstat, + Largest = byte; + + /* DqsRcvEn byte 5,4 */ +- val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, index_reg, 0x20); ++ val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, dct, index_reg, 0x20); + byte = val & 0xFF; + bytex = (val >> 8) & 0xFF; + if (bytex < Smallest) +@@ -2799,7 +4423,7 @@ static void Get_DqsRcvEnGross_Diff(struct DCTStatStruc *pDCTstat, + Largest = byte; + + /* DqsRcvEn byte 7,6 */ +- val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, index_reg, 0x21); ++ val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, dct, index_reg, 0x21); + byte = val & 0xFF; + bytex = (val >> 8) & 0xFF; + if (bytex < Smallest) +@@ -2809,7 +4433,7 @@ static void Get_DqsRcvEnGross_Diff(struct DCTStatStruc *pDCTstat, + + if (pDCTstat->DimmECCPresent> 0) { + /*DqsRcvEn Ecc */ +- val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, index_reg, 0x12); ++ val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, dct, index_reg, 0x12); + byte = val & 0xFF; + bytex = (val >> 8) & 0xFF; + if (bytex < Smallest) +@@ -2873,7 +4497,7 @@ static void Get_WrDatGross_Diff(struct DCTStatStruc *pDCTstat, + } + + static u16 Get_DqsRcvEnGross_MaxMin(struct DCTStatStruc *pDCTstat, +- u32 dev, u32 index_reg, ++ u32 dev, uint8_t dct, u32 index_reg, + u32 index) + { + u8 Smallest, Largest; +@@ -2891,7 +4515,7 @@ static u16 Get_DqsRcvEnGross_MaxMin(struct DCTStatStruc *pDCTstat, + + for (i=0; i < 8; i+=2) { + if ( pDCTstat->DIMMValid & (1 << i)) { +- val = Get_NB32_index_wait(dev, index_reg, index); ++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index); + val &= 0x00E000E0; + byte = (val >> 5) & 0xFF; + if (byte < Smallest) +@@ -2929,7 +4553,7 @@ static u16 Get_WrDatGross_MaxMin(struct DCTStatStruc *pDCTstat, + Smallest = 3; + Largest = 0; + for (i=0; i < 2; i++) { +- val = Get_NB32_index_wait(dev, index_reg, index); ++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index); + val &= 0x60606060; + val >>= 5; + for (j=0; j < 4; j++) { +@@ -2945,7 +4569,7 @@ static u16 Get_WrDatGross_MaxMin(struct DCTStatStruc *pDCTstat, + + if (pDCTstat->DimmECCPresent > 0) { + index++; +- val = Get_NB32_index_wait(dev, index_reg, index); ++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index); + val &= 0x00000060; + val >>= 5; + byte = val & 0xFF; +@@ -2965,25 +4589,30 @@ static u16 Get_WrDatGross_MaxMin(struct DCTStatStruc *pDCTstat, + static void mct_PhyController_Config(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) + { +- u32 index_reg = 0x98 + 0x100 * dct; ++ uint8_t index; ++ uint32_t dword; ++ u32 index_reg = 0x98; + u32 dev = pDCTstat->dev_dct; +- u32 val; + +- if (pDCTstat->LogicalCPUID & (AMD_DR_DAC2_OR_C3 | AMD_RB_C3)) { ++ if (pDCTstat->LogicalCPUID & (AMD_DR_DAC2_OR_C3 | AMD_RB_C3 | AMD_FAM15_ALL)) { + if (pDCTstat->Dimmx4Present == 0) { +- /* Set bit7 RxDqsUDllPowerDown to register F2x[1, 0]98_x0D0F0F13 for power saving */ +- val = Get_NB32_index_wait(dev, index_reg, 0x0D0F0F13); /* Agesa v3 v6 might be wrong here. */ +- val |= 1 << 7; /* BIOS should set this bit when x4 DIMMs are not present */ +- Set_NB32_index_wait(dev, index_reg, 0x0D0F0F13, val); ++ /* Set bit7 RxDqsUDllPowerDown to register F2x[1, 0]98_x0D0F0F13 for ++ * additional power saving when x4 DIMMs are not present. ++ */ ++ for (index = 0; index < 0x9; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0013 | (index << 8)); ++ dword |= (0x1 << 7); /* RxDqsUDllPowerDown = 1 */ ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0013 | (index << 8), dword); ++ } + } + } + +- if (pDCTstat->LogicalCPUID & AMD_DR_DAC2_OR_C3) { ++ if (pDCTstat->LogicalCPUID & (AMD_DR_DAC2_OR_C3 | AMD_FAM15_ALL)) { + if (pDCTstat->DimmECCPresent == 0) { + /* Set bit4 PwrDn to register F2x[1, 0]98_x0D0F0830 for power saving */ +- val = Get_NB32_index_wait(dev, index_reg, 0x0D0F0830); +- val |= 1 << 4; /* BIOS should set this bit if ECC DIMMs are not present */ +- Set_NB32_index_wait(dev, index_reg, 0x0D0F0830, val); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0830); ++ dword |= 1 << 4; /* BIOS should set this bit if ECC DIMMs are not present */ ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0830, dword); + } + } + +@@ -3024,21 +4653,61 @@ static void mct_FinalMCT_D(struct MCTStatStruc *pMCTstat, + val &= ~(1 << 12); + + val &= 0x0FFFFFFF; +- switch (pDCTstat->Speed) { +- case 4: +- val |= 0x50000000; /* 5 for DDR800 */ +- break; +- case 5: +- val |= 0x60000000; /* 6 for DDR1066 */ +- break; +- case 6: +- val |= 0x80000000; /* 8 for DDR800 */ +- break; +- default: +- val |= 0x90000000; /* 9 for DDR1600 */ +- break; ++ if (!is_fam15h()) { ++ switch (pDCTstat->Speed) { ++ case 4: ++ val |= 0x50000000; /* 5 for DDR800 */ ++ break; ++ case 5: ++ val |= 0x60000000; /* 6 for DDR1066 */ ++ break; ++ case 6: ++ val |= 0x80000000; /* 8 for DDR800 */ ++ break; ++ default: ++ val |= 0x90000000; /* 9 for DDR1600 */ ++ break; ++ } + } + Set_NB32(pDCTstat->dev_dct, 0x1B0, val); ++ ++ if (is_fam15h()) { ++ uint8_t wm1; ++ uint8_t wm2; ++ ++ switch (pDCTstat->Speed) { ++ case 0x4: ++ wm1 = 0x3; ++ wm2 = 0x4; ++ break; ++ case 0x6: ++ wm1 = 0x3; ++ wm2 = 0x5; ++ break; ++ case 0xa: ++ wm1 = 0x4; ++ wm2 = 0x6; ++ break; ++ case 0xe: ++ wm1 = 0x5; ++ wm2 = 0x8; ++ break; ++ case 0x12: ++ wm1 = 0x6; ++ wm2 = 0x9; ++ break; ++ default: ++ wm1 = 0x7; ++ wm2 = 0xa; ++ break; ++ } ++ ++ val = Get_NB32(pDCTstat->dev_dct, 0x1B4); ++ val &= ~(0x3ff); ++ val |= ((wm2 & 0x1f) << 5); ++ val |= (wm1 & 0x1f); ++ Set_NB32(pDCTstat->dev_dct, 0x1B4, val); ++ } + } + } + +@@ -3055,16 +4724,103 @@ static void mct_FinalMCT_D(struct MCTStatStruc *pMCTstat, + } + } + ++void mct_ForceNBPState0_En_Fam15(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat) ++{ ++ /* Force the NB P-state to P0 */ ++ uint32_t dword; ++ uint32_t dword2; ++ ++ dword = Get_NB32(pDCTstat->dev_nbctl, 0x174); ++ if (!(dword & 0x1)) { ++ dword = Get_NB32(pDCTstat->dev_nbctl, 0x170); ++ pDCTstat->SwNbPstateLoDis = (dword >> 14) & 0x1; ++ pDCTstat->NbPstateDisOnP0 = (dword >> 13) & 0x1; ++ pDCTstat->NbPstateThreshold = (dword >> 9) & 0x7; ++ pDCTstat->NbPstateHi = (dword >> 6) & 0x3; ++ dword &= ~(0x1 << 14); /* SwNbPstateLoDis = 0 */ ++ dword &= ~(0x1 << 13); /* NbPstateDisOnP0 = 0 */ ++ dword &= ~(0x7 << 9); /* NbPstateThreshold = 0 */ ++ dword &= ~(0x3 << 3); /* NbPstateLo = NbPstateMaxVal */ ++ dword |= ((dword & 0x3) << 3); ++ Set_NB32(pDCTstat->dev_nbctl, 0x170, dword); ++ ++ /* Wait until CurNbPState == NbPstateLo */ ++ do { ++ dword2 = Get_NB32(pDCTstat->dev_nbctl, 0x174); ++ } while (((dword2 << 19) & 0x7) != (dword & 0x3)); ++ ++ dword = Get_NB32(pDCTstat->dev_nbctl, 0x170); ++ dword &= ~(0x3 << 6); /* NbPstateHi = 0 */ ++ dword |= (0x3 << 14); /* SwNbPstateLoDis = 1 */ ++ Set_NB32(pDCTstat->dev_nbctl, 0x170, dword); ++ ++ /* Wait until CurNbPState == 0 */ ++ do { ++ dword2 = Get_NB32(pDCTstat->dev_nbctl, 0x174); ++ } while (((dword2 << 19) & 0x7) != 0); ++ } ++} ++ ++void mct_ForceNBPState0_Dis_Fam15(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat) ++{ ++ /* Restore normal NB P-state functionailty */ ++ uint32_t dword; ++ ++ dword = Get_NB32(pDCTstat->dev_nbctl, 0x174); ++ if (!(dword & 0x1)) { ++ dword = Get_NB32(pDCTstat->dev_nbctl, 0x170); ++ dword &= ~(0x1 << 14); /* SwNbPstateLoDis*/ ++ dword |= ((pDCTstat->SwNbPstateLoDis & 0x1) << 14); ++ dword &= ~(0x1 << 13); /* NbPstateDisOnP0 */ ++ dword |= ((pDCTstat->NbPstateDisOnP0 & 0x1) << 13); ++ dword &= ~(0x7 << 9); /* NbPstateThreshold */ ++ dword |= ((pDCTstat->NbPstateThreshold & 0x7) << 9); ++ dword &= ~(0x3 << 6); /* NbPstateHi */ ++ dword |= ((pDCTstat->NbPstateHi & 0x3) << 3); ++ Set_NB32(pDCTstat->dev_nbctl, 0x170, dword); ++ } ++} ++ + static void mct_InitialMCT_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat) + { +- mct_SetClToNB_D(pMCTstat, pDCTstat); +- mct_SetWbEnhWsbDis_D(pMCTstat, pDCTstat); ++ if (is_fam15h()) { ++ msr_t p0_state_msr; ++ uint8_t cpu_fid; ++ uint8_t cpu_did; ++ uint32_t cpu_divisor; ++ uint8_t boost_states; ++ ++ /* Retrieve the number of boost states */ ++ boost_states = (Get_NB32(pDCTstat->dev_link, 0x15c) >> 2) & 0x7; ++ ++ /* Retrieve and store the TSC frequency (P0 COF) */ ++ p0_state_msr = rdmsr(0xc0010064 + boost_states); ++ cpu_fid = p0_state_msr.lo & 0x3f; ++ cpu_did = (p0_state_msr.lo >> 6) & 0x7; ++ cpu_divisor = (0x1 << cpu_did); ++ pMCTstat->TSCFreq = (100 * (cpu_fid + 0x10)) / cpu_divisor; ++ ++ mct_ForceNBPState0_En_Fam15(pMCTstat, pDCTstat); ++ } else { ++ /* K10 BKDG v3.62 section 2.8.9.2 */ ++ printk(BIOS_DEBUG, "mct_InitialMCT_D: clear_legacy_Mode\n"); ++ clear_legacy_Mode(pMCTstat, pDCTstat); ++ ++ /* Northbridge configuration */ ++ mct_SetClToNB_D(pMCTstat, pDCTstat); ++ mct_SetWbEnhWsbDis_D(pMCTstat, pDCTstat); ++ } + } + + static u32 mct_NodePresent_D(void) + { + u32 val; +- val = 0x12001022; ++ if (is_fam15h()) ++ val = 0x16001022; ++ else ++ val = 0x12001022; + return val; + } + +@@ -3097,14 +4853,13 @@ static void clear_legacy_Mode(struct MCTStatStruc *pMCTstat, + + /* Clear Legacy BIOS Mode bit */ + reg = 0x94; +- val = Get_NB32(dev, reg); ++ val = Get_NB32_DCT(dev, 0, reg); + val &= ~(1<<LegacyBiosMode); +- Set_NB32(dev, reg, val); ++ Set_NB32_DCT(dev, 0, reg, val); + +- reg = 0x94 + 0x100; +- val = Get_NB32(dev, reg); ++ val = Get_NB32_DCT(dev, 1, reg); + val &= ~(1<<LegacyBiosMode); +- Set_NB32(dev, reg, val); ++ Set_NB32_DCT(dev, 1, reg, val); + } + + static void mct_HTMemMapExt(struct MCTStatStruc *pMCTstat, +@@ -3171,7 +4926,7 @@ static void SetCSTriState(struct MCTStatStruc *pMCTstat, + { + u32 val; + u32 dev = pDCTstat->dev_dct; +- u32 index_reg = 0x98 + 0x100 * dct; ++ u32 index_reg = 0x98; + u32 index; + u16 word; + +@@ -3186,9 +4941,9 @@ static void SetCSTriState(struct MCTStatStruc *pMCTstat, + } + word = (~word) & 0xFF; + index = 0x0c; +- val = Get_NB32_index_wait(dev, index_reg, index); ++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index); + val |= word; +- Set_NB32_index_wait(dev, index_reg, index, val); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, index, val); + } + + static void SetCKETriState(struct MCTStatStruc *pMCTstat, +@@ -3196,7 +4951,7 @@ static void SetCKETriState(struct MCTStatStruc *pMCTstat, + { + u32 val; + u32 dev; +- u32 index_reg = 0x98 + 0x100 * dct; ++ u32 index_reg = 0x98; + u32 index; + u16 word; + +@@ -3208,14 +4963,14 @@ static void SetCKETriState(struct MCTStatStruc *pMCTstat, + word = pDCTstat->CSPresent; + + index = 0x0c; +- val = Get_NB32_index_wait(dev, index_reg, index); ++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index); + if ((word & 0x55) == 0) + val |= 1 << 12; + + if ((word & 0xAA) == 0) + val |= 1 << 13; + +- Set_NB32_index_wait(dev, index_reg, index, val); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, index, val); + } + + static void SetODTTriState(struct MCTStatStruc *pMCTstat, +@@ -3223,7 +4978,7 @@ static void SetODTTriState(struct MCTStatStruc *pMCTstat, + { + u32 val; + u32 dev; +- u32 index_reg = 0x98 + 0x100 * dct; ++ u32 index_reg = 0x98; + u8 cs; + u32 index; + u8 odt; +@@ -3257,86 +5012,281 @@ static void SetODTTriState(struct MCTStatStruc *pMCTstat, + } + + index = 0x0C; +- val = Get_NB32_index_wait(dev, index_reg, index); ++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index); + val |= ((odt & 0xFF) << 8); /* set bits 11:8 ODTTriState[3:0] */ +- Set_NB32_index_wait(dev, index_reg, index, val); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, index, val); ++ ++} ++ ++/* Family 15h */ ++static void InitDDRPhy(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, u8 dct) ++{ ++ uint8_t index; ++ uint32_t dword; ++ uint8_t ddr_voltage_index; ++ uint8_t amd_voltage_level_index = 0; ++ uint32_t index_reg = 0x98; ++ uint32_t dev = pDCTstat->dev_dct; ++ ++ printk(BIOS_DEBUG, "%s: Start\n", __func__); + ++ /* Find current DDR supply voltage for this DCT */ ++ ddr_voltage_index = dct_ddr_voltage_index(pDCTstat, dct); ++ ++ /* Fam15h BKDG v3.14 section 2.10.5.3 ++ * The remainder of the Phy Initialization algorithm picks up in phyAssistedMemFnceTraining ++ */ ++ for (dct = 0; dct < 2; dct++) { ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000b, 0x80000000); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe013, 0x00000118); ++ ++ /* Program desired VDDIO level */ ++ if (ddr_voltage_index & 0x4) { ++ /* 1.25V */ ++ amd_voltage_level_index = 0x2; ++ } else if (ddr_voltage_index & 0x2) { ++ /* 1.35V */ ++ amd_voltage_level_index = 0x1; ++ } else if (ddr_voltage_index & 0x1) { ++ /* 1.50V */ ++ amd_voltage_level_index = 0x0; ++ } ++ ++ /* D18F2x9C_x0D0F_0[F,8:0]1F_dct[1:0][RxVioLvl] */ ++ for (index = 0; index < 0x9; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f001f | (index << 8)); ++ dword &= ~(0x3 << 3); ++ dword |= (amd_voltage_level_index << 3); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f001f | (index << 8), dword); ++ } ++ ++ /* D18F2x9C_x0D0F_[C,8,2][2:0]1F_dct[1:0][RxVioLvl] */ ++ for (index = 0; index < 0x3; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f201f | (index << 8)); ++ dword &= ~(0x3 << 3); ++ dword |= (amd_voltage_level_index << 3); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f201f | (index << 8), dword); ++ } ++ for (index = 0; index < 0x2; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f801f | (index << 8)); ++ dword &= ~(0x3 << 3); ++ dword |= (amd_voltage_level_index << 3); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f801f | (index << 8), dword); ++ } ++ for (index = 0; index < 0x1; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc01f | (index << 8)); ++ dword &= ~(0x3 << 3); ++ dword |= (amd_voltage_level_index << 3); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc01f | (index << 8), dword); ++ } ++ ++ /* D18F2x9C_x0D0F_4009_dct[1:0][CmpVioLvl, ComparatorAdjust] */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f4009); ++ dword &= ~(0x0000c00c); ++ dword |= (amd_voltage_level_index << 14); ++ dword |= (amd_voltage_level_index << 2); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f4009, dword); ++ } ++ ++ printk(BIOS_DEBUG, "%s: Done\n", __func__); + } + + static void InitPhyCompensation(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) + { + u8 i; +- u32 index_reg = 0x98 + 0x100 * dct; ++ u32 index_reg = 0x98; + u32 dev = pDCTstat->dev_dct; +- u32 val; + u32 valx = 0; +- u32 dword; ++ uint8_t index; ++ uint32_t dword; + const u8 *p; + +- val = Get_NB32_index_wait(dev, index_reg, 0x00); +- dword = 0; +- for (i=0; i < 6; i++) { +- switch (i) { +- case 0: +- case 4: +- p = Table_Comp_Rise_Slew_15x; +- valx = p[(val >> 16) & 3]; +- break; +- case 1: +- case 5: +- p = Table_Comp_Fall_Slew_15x; +- valx = p[(val >> 16) & 3]; +- break; +- case 2: +- p = Table_Comp_Rise_Slew_20x; +- valx = p[(val >> 8) & 3]; +- break; +- case 3: +- p = Table_Comp_Fall_Slew_20x; +- valx = p[(val >> 8) & 3]; +- break; ++ printk(BIOS_DEBUG, "%s: Start\n", __func__); ++ ++ if (is_fam15h()) { ++ /* Algorithm detailed in the Fam15h BKDG Rev. 3.14 section 2.10.5.3.4 */ ++ uint32_t tx_pre; ++ uint32_t drive_strength; ++ ++ /* Program D18F2x9C_x0D0F_E003_dct[1:0][DisAutoComp, DisablePredriverCal] */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe003); ++ dword |= (0x3 << 13); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe003, dword); ++ ++ /* Determine TxPreP/TxPreN for data lanes (Stage 1) */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000000); ++ drive_strength = (dword >> 20) & 0x7; /* DqsDrvStren */ ++ tx_pre = fam15h_phy_predriver_calibration_code(pDCTstat, dct, drive_strength); ++ ++ /* Program TxPreP/TxPreN for data lanes (Stage 1) */ ++ for (index = 0; index < 0x9; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0006 | (index << 8)); ++ dword &= ~(0xfff); ++ dword |= tx_pre; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0006 | (index << 8), dword); ++ } + ++ /* Determine TxPreP/TxPreN for data lanes (Stage 2) */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000000); ++ drive_strength = (dword >> 16) & 0x7; /* DataDrvStren */ ++ tx_pre = fam15h_phy_predriver_calibration_code(pDCTstat, dct, drive_strength); ++ ++ /* Program TxPreP/TxPreN for data lanes (Stage 2) */ ++ for (index = 0; index < 0x9; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000a | (index << 8)); ++ dword &= ~(0xfff); ++ dword |= tx_pre; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000a | (index << 8), dword); ++ } ++ for (index = 0; index < 0x9; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0002 | (index << 8)); ++ dword &= ~(0xfff); ++ dword |= (0x8000 | tx_pre); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0002 | (index << 8), dword); + } +- dword |= valx << (5 * i); +- } + +- /* Override/Exception */ +- if (!pDCTstat->GangedMode) { +- i = 0; /* use i for the dct setting required */ +- if (pDCTstat->MAdimms[0] < 4) +- i = 1; +- if (((pDCTstat->Speed == 2) || (pDCTstat->Speed == 3)) && (pDCTstat->MAdimms[i] == 4)) { +- dword &= 0xF18FFF18; +- index_reg = 0x98; /* force dct = 0 */ ++ /* Determine TxPreP/TxPreN for command/address lines (Stage 1) */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000000); ++ drive_strength = (dword >> 4) & 0x7; /* CsOdtDrvStren */ ++ tx_pre = fam15h_phy_predriver_cmd_addr_calibration_code(pDCTstat, dct, drive_strength); ++ ++ /* Program TxPreP/TxPreN for command/address lines (Stage 1) */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8006); ++ dword &= ~(0xfff); ++ dword |= tx_pre; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8006, dword); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f800a); ++ dword &= ~(0xfff); ++ dword |= tx_pre; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f800a, dword); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8002); ++ dword &= ~(0xfff); ++ dword |= (0x8000 | tx_pre); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8002, dword); ++ ++ /* Determine TxPreP/TxPreN for command/address lines (Stage 2) */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000000); ++ drive_strength = (dword >> 8) & 0x7; /* AddrCmdDrvStren */ ++ tx_pre = fam15h_phy_predriver_cmd_addr_calibration_code(pDCTstat, dct, drive_strength); ++ ++ /* Program TxPreP/TxPreN for command/address lines (Stage 2) */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8106); ++ dword &= ~(0xfff); ++ dword |= tx_pre; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8106, dword); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f810a); ++ dword &= ~(0xfff); ++ dword |= tx_pre; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f810a, dword); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc006); ++ dword &= ~(0xfff); ++ dword |= tx_pre; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc006, dword); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc00a); ++ dword &= ~(0xfff); ++ dword |= tx_pre; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc00a, dword); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc00e); ++ dword &= ~(0xfff); ++ dword |= tx_pre; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc00e, dword); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc012); ++ dword &= ~(0xfff); ++ dword |= tx_pre; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc012, dword); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8102); ++ dword &= ~(0xfff); ++ dword |= (0x8000 | tx_pre); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8102, dword); ++ ++ /* Determine TxPreP/TxPreN for command/address lines (Stage 3) */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000000); ++ drive_strength = (dword >> 0) & 0x7; /* CkeDrvStren */ ++ tx_pre = fam15h_phy_predriver_cmd_addr_calibration_code(pDCTstat, dct, drive_strength); ++ ++ /* Program TxPreP/TxPreN for command/address lines (Stage 3) */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc002); ++ dword &= ~(0xfff); ++ dword |= (0x8000 | tx_pre); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc002, dword); ++ ++ /* Determine TxPreP/TxPreN for clock lines */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000000); ++ drive_strength = (dword >> 12) & 0x7; /* ClkDrvStren */ ++ tx_pre = fam15h_phy_predriver_clk_calibration_code(pDCTstat, dct, drive_strength); ++ ++ /* Program TxPreP/TxPreN for clock lines */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2002); ++ dword &= ~(0xfff); ++ dword |= (0x8000 | tx_pre); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2002, dword); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2102); ++ dword &= ~(0xfff); ++ dword |= (0x8000 | tx_pre); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2102, dword); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2202); ++ dword &= ~(0xfff); ++ dword |= (0x8000 | tx_pre); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2202, dword); ++ } else { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00); ++ dword = 0; ++ for (i=0; i < 6; i++) { ++ switch (i) { ++ case 0: ++ case 4: ++ p = Table_Comp_Rise_Slew_15x; ++ valx = p[(dword >> 16) & 3]; ++ break; ++ case 1: ++ case 5: ++ p = Table_Comp_Fall_Slew_15x; ++ valx = p[(dword >> 16) & 3]; ++ break; ++ case 2: ++ p = Table_Comp_Rise_Slew_20x; ++ valx = p[(dword >> 8) & 3]; ++ break; ++ case 3: ++ p = Table_Comp_Fall_Slew_20x; ++ valx = p[(dword >> 8) & 3]; ++ break; ++ } ++ dword |= valx << (5 * i); + } ++ ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0a, dword); + } + +- Set_NB32_index_wait(dev, index_reg, 0x0a, dword); ++ printk(BIOS_DEBUG, "%s: Done\n", __func__); + } + + static void mct_EarlyArbEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) + { +- u32 reg; +- u32 val; +- u32 dev = pDCTstat->dev_dct; +- +- /* GhEnhancement #18429 modified by askar: For low NB CLK : +- * Memclk ratio, the DCT may need to arbitrate early to avoid +- * unnecessary bubbles. +- * bit 19 of F2x[1,0]78 Dram Control Register, set this bit only when +- * NB CLK : Memclk ratio is between 3:1 (inclusive) to 4:5 (inclusive) +- */ +- reg = 0x78 + 0x100 * dct; +- val = Get_NB32(dev, reg); +- +- if (pDCTstat->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx)) +- val |= (1 << EarlyArbEn); +- else if (CheckNBCOFEarlyArbEn(pMCTstat, pDCTstat)) +- val |= (1 << EarlyArbEn); +- +- Set_NB32(dev, reg, val); ++ if (!is_fam15h()) { ++ u32 reg; ++ u32 val; ++ u32 dev = pDCTstat->dev_dct; ++ ++ /* GhEnhancement #18429 modified by askar: For low NB CLK : ++ * Memclk ratio, the DCT may need to arbitrate early to avoid ++ * unnecessary bubbles. ++ * bit 19 of F2x[1,0]78 Dram Control Register, set this bit only when ++ * NB CLK : Memclk ratio is between 3:1 (inclusive) to 4:5 (inclusive) ++ */ ++ reg = 0x78; ++ val = Get_NB32_DCT(dev, dct, reg); ++ ++ if (pDCTstat->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx)) ++ val |= (1 << EarlyArbEn); ++ else if (CheckNBCOFEarlyArbEn(pMCTstat, pDCTstat)) ++ val |= (1 << EarlyArbEn); ++ ++ Set_NB32_DCT(dev, dct, reg, val); ++ } + } + + static u8 CheckNBCOFEarlyArbEn(struct MCTStatStruc *pMCTstat, +@@ -3359,9 +5309,9 @@ static u8 CheckNBCOFEarlyArbEn(struct MCTStatStruc *pMCTstat, + NbDid |= 1; + + reg = 0x94; +- val = Get_NB32(dev, reg); ++ val = Get_NB32_DCT(dev, 0, reg); + if (!(val & (1 << MemClkFreqVal))) +- val = Get_NB32(dev, reg + 0x100); /* get the DCT1 value */ ++ val = Get_NB32_DCT(dev, 1, reg); /* get the DCT1 value */ + + val &= 0x07; + val += 3; +@@ -3430,28 +5380,204 @@ static void mct_ResetDataStruct_D(struct MCTStatStruc *pMCTstat, + } + + static void mct_BeforeDramInit_Prod_D(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat) ++ struct DCTStatStruc *pDCTstat, u8 dct) ++{ ++ mct_ProgramODT_D(pMCTstat, pDCTstat, dct); ++} ++ ++static void mct_ProgramODT_D(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, u8 dct) + { + u8 i; +- u32 reg_off, dword; ++ u32 dword; + u32 dev = pDCTstat->dev_dct; + +- if (pDCTstat->LogicalCPUID & AMD_DR_Dx) { ++ /* FIXME ++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel ++ * For now assume a maximum of 2 DIMMs per channel can be installed ++ */ ++ uint8_t MaxDimmsInstallable = 2; ++ ++ if (is_fam15h()) { ++ /* Obtain number of DIMMs on channel */ ++ uint8_t dimm_count = pDCTstat->MAdimms[dct]; ++ uint8_t rank_count_dimm0; ++ uint8_t rank_count_dimm1; ++ uint32_t odt_pattern_0; ++ uint32_t odt_pattern_1; ++ uint32_t odt_pattern_2; ++ uint32_t odt_pattern_3; ++ uint8_t write_odt_duration; ++ uint8_t read_odt_duration; ++ uint8_t write_odt_delay; ++ uint8_t read_odt_delay; ++ ++ /* Select appropriate ODT pattern for installed DIMMs ++ * Refer to the Fam15h BKDG Rev. 3.14, page 149 onwards ++ */ ++ if (pDCTstat->C_DCTPtr[dct]->Status[DCT_STATUS_REGISTERED]) { ++ if (MaxDimmsInstallable == 2) { ++ if (dimm_count == 1) { ++ /* 1 DIMM detected */ ++ rank_count_dimm1 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1]; ++ if (rank_count_dimm1 == 1) { ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x00020000; ++ } else if (rank_count_dimm1 == 2) { ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x08020000; ++ } else if (rank_count_dimm1 == 4) { ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x020a0000; ++ odt_pattern_3 = 0x080a0000; ++ } else { ++ /* Fallback */ ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x08020000; ++ } ++ } else { ++ /* 2 DIMMs detected */ ++ rank_count_dimm0 = pDCTstat->C_DCTPtr[dct]->DimmRanks[0]; ++ rank_count_dimm1 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1]; ++ if ((rank_count_dimm0 < 4) && (rank_count_dimm1 < 4)) { ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x01010202; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x09030603; ++ } else if ((rank_count_dimm0 < 4) && (rank_count_dimm1 == 4)) { ++ odt_pattern_0 = 0x01010000; ++ odt_pattern_1 = 0x01010a0a; ++ odt_pattern_2 = 0x01090000; ++ odt_pattern_3 = 0x01030e0b; ++ } else if ((rank_count_dimm0 == 4) && (rank_count_dimm1 < 4)) { ++ odt_pattern_0 = 0x00000202; ++ odt_pattern_1 = 0x05050202; ++ odt_pattern_2 = 0x00000206; ++ odt_pattern_3 = 0x0d070203; ++ } else if ((rank_count_dimm0 == 4) && (rank_count_dimm1 == 4)) { ++ odt_pattern_0 = 0x05050a0a; ++ odt_pattern_1 = 0x05050a0a; ++ odt_pattern_2 = 0x050d0a0e; ++ odt_pattern_3 = 0x05070a0b; ++ } else { ++ /* Fallback */ ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x00000000; ++ } ++ } ++ } else { ++ /* FIXME ++ * 3 DIMMs per channel UNIMPLEMENTED ++ */ ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x00000000; ++ } ++ } else if (pDCTstat->C_DCTPtr[dct]->Status[DCT_STATUS_LOAD_REDUCED]) { ++ /* TODO ++ * Load reduced dimms UNIMPLEMENTED ++ */ ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x00000000; ++ } else { ++ if (MaxDimmsInstallable == 2) { ++ if (dimm_count == 1) { ++ /* 1 DIMM detected */ ++ rank_count_dimm1 = pDCTstat->C_DCTPtr[dct]->DimmRanks[1]; ++ if (rank_count_dimm1 == 1) { ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x00020000; ++ } else if (rank_count_dimm1 == 2) { ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x08020000; ++ } else { ++ /* Fallback */ ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x08020000; ++ } ++ } else { ++ /* 2 DIMMs detected */ ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x01010202; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x09030603; ++ } ++ } else { ++ /* FIXME ++ * 3 DIMMs per channel UNIMPLEMENTED ++ */ ++ odt_pattern_0 = 0x00000000; ++ odt_pattern_1 = 0x00000000; ++ odt_pattern_2 = 0x00000000; ++ odt_pattern_3 = 0x00000000; ++ } ++ } ++ ++ if (pDCTstat->C_DCTPtr[dct]->Status[DCT_STATUS_LOAD_REDUCED]) { ++ /* TODO ++ * Load reduced dimms UNIMPLEMENTED ++ */ ++ write_odt_duration = 0x0; ++ read_odt_duration = 0x0; ++ write_odt_delay = 0x0; ++ read_odt_delay = 0x0; ++ } else { ++ uint8_t tcl; ++ uint8_t tcwl; ++ tcl = Get_NB32_DCT(dev, dct, 0x200) & 0x1f; ++ tcwl = Get_NB32_DCT(dev, dct, 0x20c) & 0x1f; ++ ++ write_odt_duration = 0x6; ++ read_odt_duration = 0x6; ++ write_odt_delay = 0x0; ++ if (tcl > tcwl) ++ read_odt_delay = tcl - tcwl; ++ else ++ read_odt_delay = 0x0; ++ } ++ ++ /* Program ODT pattern */ ++ Set_NB32_DCT(dev, dct, 0x230, odt_pattern_1); ++ Set_NB32_DCT(dev, dct, 0x234, odt_pattern_0); ++ Set_NB32_DCT(dev, dct, 0x238, odt_pattern_3); ++ Set_NB32_DCT(dev, dct, 0x23c, odt_pattern_2); ++ dword = Get_NB32_DCT(dev, dct, 0x240); ++ dword &= ~(0x7 << 12); /* WrOdtOnDuration = write_odt_duration */ ++ dword |= (write_odt_duration & 0x7) << 12; ++ dword &= ~(0x7 << 8); /* WrOdtTrnOnDly = write_odt_delay */ ++ dword |= (write_odt_delay & 0x7) << 8; ++ dword &= ~(0xf << 4); /* RdOdtOnDuration = read_odt_duration */ ++ dword |= (read_odt_duration & 0xf) << 4; ++ dword &= ~(0xf); /* RdOdtTrnOnDly = read_odt_delay */ ++ dword |= (read_odt_delay & 0xf); ++ Set_NB32_DCT(dev, dct, 0x240, dword); ++ } else if (pDCTstat->LogicalCPUID & AMD_DR_Dx) { + if (pDCTstat->Speed == 3) + dword = 0x00000800; + else + dword = 0x00000000; + for (i=0; i < 2; i++) { +- reg_off = 0x100 * i; +- Set_NB32(dev, 0x98 + reg_off, 0x0D000030); +- Set_NB32(dev, 0x9C + reg_off, dword); +- Set_NB32(dev, 0x98 + reg_off, 0x4D040F30); +- +- /* FIXME +- * Mainboards need to be able to specify the maximum number of DIMMs installable per channel +- * For now assume a maximum of 2 DIMMs per channel can be installed +- */ +- uint8_t MaxDimmsInstallable = 2; ++ Set_NB32_DCT(dev, i, 0x98, 0x0D000030); ++ Set_NB32_DCT(dev, i, 0x9C, dword); ++ Set_NB32_DCT(dev, i, 0x98, 0x4D040F30); + + /* Obtain number of DIMMs on channel */ + uint8_t dimm_count = pDCTstat->MAdimms[i]; +@@ -3463,7 +5589,7 @@ static void mct_BeforeDramInit_Prod_D(struct MCTStatStruc *pMCTstat, + uint32_t odt_pattern_3; + + /* Select appropriate ODT pattern for installed DIMMs +- * Refer to the BKDG Rev. 3.62, page 120 onwards ++ * Refer to the Fam10h BKDG Rev. 3.62, page 120 onwards + */ + if (pDCTstat->C_DCTPtr[i]->Status[DCT_STATUS_REGISTERED]) { + if (MaxDimmsInstallable == 2) { +@@ -3574,10 +5700,10 @@ static void mct_BeforeDramInit_Prod_D(struct MCTStatStruc *pMCTstat, + } + + /* Program ODT pattern */ +- Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x180, odt_pattern_1); +- Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x181, odt_pattern_0); +- Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x182, odt_pattern_3); +- Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x183, odt_pattern_2); ++ Set_NB32_index_wait_DCT(dev, i, 0xf0, 0x180, odt_pattern_1); ++ Set_NB32_index_wait_DCT(dev, i, 0xf0, 0x181, odt_pattern_0); ++ Set_NB32_index_wait_DCT(dev, i, 0xf0, 0x182, odt_pattern_3); ++ Set_NB32_index_wait_DCT(dev, i, 0xf0, 0x183, odt_pattern_2); + } + } + } +@@ -3585,34 +5711,32 @@ static void mct_BeforeDramInit_Prod_D(struct MCTStatStruc *pMCTstat, + static void mct_EnDllShutdownSR(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) + { +- u32 reg_off = 0x100 * dct; + u32 dev = pDCTstat->dev_dct, val; + + /* Write 0000_07D0h to register F2x[1, 0]98_x4D0FE006 */ + if (pDCTstat->LogicalCPUID & (AMD_DR_DAC2_OR_C3)) { +- Set_NB32(dev, 0x9C + reg_off, 0x1C); +- Set_NB32(dev, 0x98 + reg_off, 0x4D0FE006); +- Set_NB32(dev, 0x9C + reg_off, 0x13D); +- Set_NB32(dev, 0x98 + reg_off, 0x4D0FE007); ++ Set_NB32_DCT(dev, dct, 0x9C, 0x1C); ++ Set_NB32_DCT(dev, dct, 0x98, 0x4D0FE006); ++ Set_NB32_DCT(dev, dct, 0x9C, 0x13D); ++ Set_NB32_DCT(dev, dct, 0x98, 0x4D0FE007); + +- val = Get_NB32(dev, 0x90 + reg_off); ++ val = Get_NB32_DCT(dev, dct, 0x90); + val &= ~(1 << 27/* DisDllShutdownSR */); +- Set_NB32(dev, 0x90 + reg_off, val); ++ Set_NB32_DCT(dev, dct, 0x90, val); + } + } + + static u32 mct_DisDllShutdownSR(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u32 DramConfigLo, u8 dct) + { +- u32 reg_off = 0x100 * dct; + u32 dev = pDCTstat->dev_dct; + + /* Write 0000_07D0h to register F2x[1, 0]98_x4D0FE006 */ + if (pDCTstat->LogicalCPUID & (AMD_DR_DAC2_OR_C3)) { +- Set_NB32(dev, 0x9C + reg_off, 0x7D0); +- Set_NB32(dev, 0x98 + reg_off, 0x4D0FE006); +- Set_NB32(dev, 0x9C + reg_off, 0x190); +- Set_NB32(dev, 0x98 + reg_off, 0x4D0FE007); ++ Set_NB32_DCT(dev, dct, 0x9C, 0x7D0); ++ Set_NB32_DCT(dev, dct, 0x98, 0x4D0FE006); ++ Set_NB32_DCT(dev, dct, 0x9C, 0x190); ++ Set_NB32_DCT(dev, dct, 0x98, 0x4D0FE007); + + DramConfigLo |= /* DisDllShutdownSR */ 1 << 27; + } +@@ -3704,52 +5828,61 @@ void ProgDramMRSReg_D(struct MCTStatStruc *pMCTstat, + DramMRS |= 1 << 23; + } + } +- /* +- DRAM MRS Register +- DrvImpCtrl: drive impedance control.01b(34 ohm driver; Ron34 = Rzq/7) +- */ +- DramMRS |= 1 << 2; +- /* Dram nominal termination: */ +- byte = pDCTstat->MAdimms[dct]; +- if (!(pDCTstat->Status & (1 << SB_Registered))) { +- DramMRS |= 1 << 7; /* 60 ohms */ +- if (byte & 2) { +- if (pDCTstat->Speed < 6) +- DramMRS |= 1 << 8; /* 40 ohms */ +- else +- DramMRS |= 1 << 9; /* 30 ohms */ ++ ++ if (is_fam15h()) { ++ DramMRS |= (0x1 << 23); /* PchgPDModeSel = 1 */ ++ } else { ++ /* ++ DRAM MRS Register ++ DrvImpCtrl: drive impedance control.01b(34 ohm driver; Ron34 = Rzq/7) ++ */ ++ DramMRS |= 1 << 2; ++ /* Dram nominal termination: */ ++ byte = pDCTstat->MAdimms[dct]; ++ if (!(pDCTstat->Status & (1 << SB_Registered))) { ++ DramMRS |= 1 << 7; /* 60 ohms */ ++ if (byte & 2) { ++ if (pDCTstat->Speed < 6) ++ DramMRS |= 1 << 8; /* 40 ohms */ ++ else ++ DramMRS |= 1 << 9; /* 30 ohms */ ++ } + } +- } +- /* Dram dynamic termination: Disable(1DIMM), 120ohm(>=2DIMM) */ +- if (!(pDCTstat->Status & (1 << SB_Registered))) { +- if (byte >= 2) { +- if (pDCTstat->Speed == 7) +- DramMRS |= 1 << 10; +- else +- DramMRS |= 1 << 11; ++ /* Dram dynamic termination: Disable(1DIMM), 120ohm(>=2DIMM) */ ++ if (!(pDCTstat->Status & (1 << SB_Registered))) { ++ if (byte >= 2) { ++ if (pDCTstat->Speed == 7) ++ DramMRS |= 1 << 10; ++ else ++ DramMRS |= 1 << 11; ++ } ++ } else { ++ DramMRS |= mct_DramTermDyn_RDimm(pMCTstat, pDCTstat, byte); + } +- } else { +- DramMRS |= mct_DramTermDyn_RDimm(pMCTstat, pDCTstat, byte); ++ ++ /* Qoff=0, output buffers enabled */ ++ /* Tcwl */ ++ DramMRS |= (pDCTstat->Speed - 4) << 20; ++ /* ASR=1, auto self refresh */ ++ /* SRT=0 */ ++ DramMRS |= 1 << 18; + } + + /* burst length control */ + if (pDCTstat->Status & (1 << SB_128bitmode)) + DramMRS |= 1 << 1; +- /* Qoff=0, output buffers enabled */ +- /* Tcwl */ +- DramMRS |= (pDCTstat->Speed - 4) << 20; +- /* ASR=1, auto self refresh */ +- /* SRT=0 */ +- DramMRS |= 1 << 18; +- +- dword = Get_NB32(pDCTstat->dev_dct, 0x100 * dct + 0x84); +- dword &= ~0x00FC2F8F; ++ ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x84); ++ if (is_fam15h()) ++ dword &= ~0x00800003; ++ else ++ dword &= ~0x00fc2f8f; + dword |= DramMRS; +- Set_NB32(pDCTstat->dev_dct, 0x100 * dct + 0x84, dword); ++ Set_NB32_DCT(pDCTstat->dev_dct, dct, 0x84, dword); + } + +-void mct_SetDramConfigHi_D(struct DCTStatStruc *pDCTstat, u32 dct, +- u32 DramConfigHi) ++void mct_SetDramConfigHi_D(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, u32 dct, u32 DramConfigHi) + { + /* Bug#15114: Comp. update interrupted by Freq. change can cause + * subsequent update to be invalid during any MemClk frequency change: +@@ -3778,45 +5911,86 @@ void mct_SetDramConfigHi_D(struct DCTStatStruc *pDCTstat, u32 dct, + */ + + u32 dev = pDCTstat->dev_dct; +- u32 index_reg = 0x98 + 0x100 * dct; ++ u32 index_reg = 0x98; + u32 index; + +- u32 val; ++ uint32_t dword; ++ ++ if (is_fam15h()) { ++ /* Initial setup for frequency change ++ * 9C_x0000_0004 must be configured before MemClkFreqVal is set ++ */ + +- index = 0x08; +- val = Get_NB32_index_wait(dev, index_reg, index); +- if (!(val & (1 << DisAutoComp))) +- Set_NB32_index_wait(dev, index_reg, index, val | (1 << DisAutoComp)); ++ /* Program D18F2x9C_x0D0F_E006_dct[1:0][PllLockTime] = 0x190 */ ++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, dct, index_reg, 0x0d0fe006); ++ dword &= ~(0x0000ffff); ++ dword |= 0x00000190; ++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, dct, index_reg, 0x0d0fe006, dword); + +- mct_Wait(100); ++ dword = Get_NB32_DCT(dev, dct, 0x94); ++ dword &= ~(1 << MemClkFreqVal); ++ Set_NB32_DCT(dev, dct, 0x94, dword); + +- Set_NB32(dev, 0x94 + 0x100 * dct, DramConfigHi); ++ dword = DramConfigHi; ++ dword &= ~(1 << MemClkFreqVal); ++ Set_NB32_DCT(dev, dct, 0x94, dword); ++ ++ mctGet_PS_Cfg_D(pMCTstat, pDCTstat, dct); ++ set_2t_configuration(pMCTstat, pDCTstat, dct); ++ mct_BeforePlatformSpec(pMCTstat, pDCTstat, dct); ++ mct_PlatformSpec(pMCTstat, pDCTstat, dct); ++ } else { ++ index = 0x08; ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, index); ++ if (!(dword & (1 << DisAutoComp))) ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, index, dword | (1 << DisAutoComp)); ++ ++ mct_Wait(100); ++ } ++ ++ /* Program the DRAM Configuration High register */ ++ Set_NB32_DCT(dev, dct, 0x94, DramConfigHi); ++ ++ if (is_fam15h()) { ++ /* Wait until F2x[1, 0]94[FreqChgInProg]=0. */ ++ do { ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94); ++ } while (dword & (1 << FreqChgInProg)); ++ ++ /* Program D18F2x9C_x0D0F_E006_dct[1:0][PllLockTime] = 0xf */ ++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, dct, index_reg, 0x0d0fe006); ++ dword &= ~(0x0000ffff); ++ dword |= 0x0000000f; ++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, dct, index_reg, 0x0d0fe006, dword); ++ } + } + + static void mct_BeforeDQSTrain_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) + { +- u8 Node; +- struct DCTStatStruc *pDCTstat; ++ if (!is_fam15h()) { ++ u8 Node; ++ struct DCTStatStruc *pDCTstat; + +- /* Errata 178 +- * +- * Bug#15115: Uncertainty In The Sync Chain Leads To Setup Violations +- * In TX FIFO +- * Solution: BIOS should program DRAM Control Register[RdPtrInit] = +- * 5h, (F2x[1, 0]78[3:0] = 5h). +- * Silicon Status: Fixed In Rev B0 +- * +- * Bug#15880: Determine validity of reset settings for DDR PHY timing. +- * Solution: At least, set WrDqs fine delay to be 0 for DDR3 training. +- */ +- for (Node = 0; Node < 8; Node++) { +- pDCTstat = pDCTstatA + Node; ++ /* Errata 178 ++ * ++ * Bug#15115: Uncertainty In The Sync Chain Leads To Setup Violations ++ * In TX FIFO ++ * Solution: BIOS should program DRAM Control Register[RdPtrInit] = ++ * 5h, (F2x[1, 0]78[3:0] = 5h). ++ * Silicon Status: Fixed In Rev B0 ++ * ++ * Bug#15880: Determine validity of reset settings for DDR PHY timing. ++ * Solution: At least, set WrDqs fine delay to be 0 for DDR3 training. ++ */ ++ for (Node = 0; Node < 8; Node++) { ++ pDCTstat = pDCTstatA + Node; + +- if (pDCTstat->NodePresent) { +- mct_BeforeDQSTrainSamp(pDCTstat); /* only Bx */ +- mct_ResetDLL_D(pMCTstat, pDCTstat, 0); +- mct_ResetDLL_D(pMCTstat, pDCTstat, 1); ++ if (pDCTstat->NodePresent) { ++ mct_BeforeDQSTrainSamp(pDCTstat); /* only Bx */ ++ mct_ResetDLL_D(pMCTstat, pDCTstat, 0); ++ mct_ResetDLL_D(pMCTstat, pDCTstat, 1); ++ } + } + } + } +@@ -3827,7 +6001,6 @@ static void mct_ResetDLL_D(struct MCTStatStruc *pMCTstat, + { + u8 Receiver; + u32 dev = pDCTstat->dev_dct; +- u32 reg_off = 0x100 * dct; + u32 addr; + u32 lo, hi; + u8 wrap32dis = 0; +@@ -3838,6 +6011,11 @@ static void mct_ResetDLL_D(struct MCTStatStruc *pMCTstat, + return; + } + ++ /* Skip reset DLL for Family 15h */ ++ if (is_fam15h()) { ++ return; ++ } ++ + addr = HWCR; + _RDMSR(addr, &lo, &hi); + if(lo & (1<<17)) { /* save the old value */ +@@ -3857,11 +6035,11 @@ static void mct_ResetDLL_D(struct MCTStatStruc *pMCTstat, + mct_Read1LTestPattern_D(pMCTstat, pDCTstat, addr); /* cache fills */ + + /* Write 0000_8000h to register F2x[1,0]9C_xD080F0C */ +- Set_NB32_index_wait(dev, 0x98 + reg_off, 0xD080F0C, 0x00008000); ++ Set_NB32_index_wait_DCT(dev, dct, 0x98, 0xD080F0C, 0x00008000); + mct_Wait(80); /* wait >= 300ns */ + + /* Write 0000_0000h to register F2x[1,0]9C_xD080F0C */ +- Set_NB32_index_wait(dev, 0x98 + reg_off, 0xD080F0C, 0x00000000); ++ Set_NB32_index_wait_DCT(dev, dct, 0x98, 0xD080F0C, 0x00000000); + mct_Wait(800); /* wait >= 2us */ + break; + } +@@ -3901,39 +6079,39 @@ static void mct_EnableDatIntlv_D(struct MCTStatStruc *pMCTstat, + static void SetDllSpeedUp_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) + { +- u32 val; +- u32 dev = pDCTstat->dev_dct; +- u32 reg_off = 0x100 * dct; +- +- if (pDCTstat->Speed >= 7) { /* DDR1600 and above */ +- /* Set bit13 PowerDown to register F2x[1, 0]98_x0D080F10 */ +- Set_NB32(dev, reg_off + 0x98, 0x0D080F10); +- val = Get_NB32(dev, reg_off + 0x9C); +- val |= 1 < 13; +- Set_NB32(dev, reg_off + 0x9C, val); +- Set_NB32(dev, reg_off + 0x98, 0x4D080F10); +- +- /* Set bit13 PowerDown to register F2x[1, 0]98_x0D080F11 */ +- Set_NB32(dev, reg_off + 0x98, 0x0D080F11); +- val = Get_NB32(dev, reg_off + 0x9C); +- val |= 1 < 13; +- Set_NB32(dev, reg_off + 0x9C, val); +- Set_NB32(dev, reg_off + 0x98, 0x4D080F11); +- +- /* Set bit13 PowerDown to register F2x[1, 0]98_x0D088F30 */ +- Set_NB32(dev, reg_off + 0x98, 0x0D088F30); +- val = Get_NB32(dev, reg_off + 0x9C); +- val |= 1 < 13; +- Set_NB32(dev, reg_off + 0x9C, val); +- Set_NB32(dev, reg_off + 0x98, 0x4D088F30); +- +- /* Set bit13 PowerDown to register F2x[1, 0]98_x0D08CF30 */ +- Set_NB32(dev, reg_off + 0x98, 0x0D08CF30); +- val = Get_NB32(dev, reg_off + 0x9C); +- val |= 1 < 13; +- Set_NB32(dev, reg_off + 0x9C, val); +- Set_NB32(dev, reg_off + 0x98, 0x4D08CF30); +- ++ if (!is_fam15h()) { ++ u32 val; ++ u32 dev = pDCTstat->dev_dct; ++ ++ if (pDCTstat->Speed >= mhz_to_memclk_config(800)) { /* DDR1600 and above */ ++ /* Set bit13 PowerDown to register F2x[1, 0]98_x0D080F10 */ ++ Set_NB32_DCT(dev, dct, 0x98, 0x0D080F10); ++ val = Get_NB32_DCT(dev, dct, 0x9C); ++ val |= 1 < 13; ++ Set_NB32_DCT(dev, dct, 0x9C, val); ++ Set_NB32_DCT(dev, dct, 0x98, 0x4D080F10); ++ ++ /* Set bit13 PowerDown to register F2x[1, 0]98_x0D080F11 */ ++ Set_NB32_DCT(dev, dct, 0x98, 0x0D080F11); ++ val = Get_NB32_DCT(dev, dct, 0x9C); ++ val |= 1 < 13; ++ Set_NB32_DCT(dev, dct, 0x9C, val); ++ Set_NB32_DCT(dev, dct, 0x98, 0x4D080F11); ++ ++ /* Set bit13 PowerDown to register F2x[1, 0]98_x0D088F30 */ ++ Set_NB32_DCT(dev, dct, 0x98, 0x0D088F30); ++ val = Get_NB32_DCT(dev, dct, 0x9C); ++ val |= 1 < 13; ++ Set_NB32_DCT(dev, dct, 0x9C, val); ++ Set_NB32_DCT(dev, dct, 0x98, 0x4D088F30); ++ ++ /* Set bit13 PowerDown to register F2x[1, 0]98_x0D08CF30 */ ++ Set_NB32_DCT(dev, dct, 0x98, 0x0D08CF30); ++ val = Get_NB32_DCT(dev, dct, 0x9C); ++ val |= 1 < 13; ++ Set_NB32_DCT(dev, dct, 0x9C, val); ++ Set_NB32_DCT(dev, dct, 0x98, 0x4D08CF30); ++ } + } + } + +@@ -3961,7 +6139,6 @@ static void SyncSetting(struct DCTStatStruc *pDCTstat) + static void AfterDramInit_D(struct DCTStatStruc *pDCTstat, u8 dct) { + + u32 val; +- u32 reg_off = 0x100 * dct; + u32 dev = pDCTstat->dev_dct; + + if (pDCTstat->LogicalCPUID & (AMD_DR_B2 | AMD_DR_B3)) { +@@ -3969,16 +6146,16 @@ static void AfterDramInit_D(struct DCTStatStruc *pDCTstat, u8 dct) { + val = Get_NB32(dev, 0x110); + if (!(val & (1 << DramEnabled))) { + /* If 50 us expires while DramEnable =0 then do the following */ +- val = Get_NB32(dev, 0x90 + reg_off); ++ val = Get_NB32_DCT(dev, dct, 0x90); + val &= ~(1 << Width128); /* Program Width128 = 0 */ +- Set_NB32(dev, 0x90 + reg_off, val); ++ Set_NB32_DCT(dev, dct, 0x90, val); + +- val = Get_NB32_index_wait(dev, 0x98 + reg_off, 0x05); /* Perform dummy CSR read to F2x09C_x05 */ ++ val = Get_NB32_index_wait_DCT(dev, dct, 0x98, 0x05); /* Perform dummy CSR read to F2x09C_x05 */ + + if (pDCTstat->GangedMode) { +- val = Get_NB32(dev, 0x90 + reg_off); ++ val = Get_NB32_DCT(dev, dct, 0x90); + val |= 1 << Width128; /* Program Width128 = 0 */ +- Set_NB32(dev, 0x90 + reg_off, val); ++ Set_NB32_DCT(dev, dct, 0x90, val); + } + } + } +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h +index a947c2d..50fbff7 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h +@@ -76,6 +76,8 @@ + /* #define PA_EXT_DCTADDL (((00 << 3)+5) << 8) */ /*Node x DCT function, Additional Registers PCI Address bits [15:0]*/ + + #define PA_NBMISC(Node) ((((0x18+Node) << 3)+3) << 12) /*Node 0 Misc PCI Address bits [15:0]*/ ++#define PA_LINK(Node) ((((0x18+Node) << 3)+4) << 12) /*Node 0 Link Control bits [15:0]*/ ++#define PA_NBCTL(Node) ((((0x18+Node) << 3)+5) << 12) /*Node 0 NB Control PCI Address bits [15:0]*/ + /* #define PA_NBDEVOP (((00 << 3)+3) << 8) */ /*Node 0 Misc PCI Address bits [15:0]*/ + + #define DCC_EN 1 /* X:2:0x94[19]*/ +@@ -129,7 +131,7 @@ + #define X4Dimm 12 /* func 2, offset 90h, bit 12*/ + #define UnBuffDimm 16 /* func 2, offset 90h, bit 16*/ + #define DimmEcEn 19 /* func 2, offset 90h, bit 19*/ +-#define MemClkFreqVal 3 /* func 2, offset 94h, bit 3*/ ++#define MemClkFreqVal ((is_fam15h())?7:3) /* func 2, offset 94h, bit 3 or 7*/ + #define RDqsEn 12 /* func 2, offset 94h, bit 12*/ + #define DisDramInterface 14 /* func 2, offset 94h, bit 14*/ + #define PowerDownEn 15 /* func 2, offset 94h, bit 15*/ +@@ -204,6 +206,7 @@ + #define JED_PROBEMSK 0x40 /*Analysis Probe installed*/ + #define JED_RDIMM 0x1 /* RDIMM */ + #define JED_MiniRDIMM 0x5 /* Mini-RDIMM */ ++ #define JED_LRDIMM 0xb /* Load-reduced DIMM */ + #define SPD_Density 4 /* Bank address bits,SDRAM capacity */ + #define SPD_Addressing 5 /* Row/Column address bits */ + #define SPD_Voltage 6 /* Supported voltage bitfield */ +@@ -297,6 +300,7 @@ struct MCTStatStruc { + of sub 4GB dram hole for HW remapping.*/ + u32 Sub4GCacheTop; /* If not zero, the 32-bit top of cacheable memory.*/ + u32 SysLimit; /* LIMIT[39:8] (system address)*/ ++ uint32_t TSCFreq; + } __attribute__((packed)); + + /*============================================================================= +@@ -320,7 +324,8 @@ struct MCTStatStruc { + + struct DCTStatStruc { /* A per Node structure*/ + /* DCTStatStruct_F - start */ +- u8 Node_ID; /* Node ID of current controller*/ ++ u8 Node_ID; /* Node ID of current controller */ ++ uint8_t stopDCT; /* Set if the DCT will be stopped */ + u8 ErrCode; /* Current error condition of Node + 0= no error + 1= Variance Error, DCT is running but not in an optimal configuration. +@@ -464,7 +469,7 @@ struct DCTStatStruc { /* A per Node structure*/ + /* CH A byte lane 0 - 7 maximum filtered window passing DQS delay value*/ + /* CH B byte lane 0 - 7 minimum filtered window passing DQS delay value*/ + /* CH B byte lane 0 - 7 maximum filtered window passing DQS delay value*/ +- u32 LogicalCPUID; /* The logical CPUID of the node*/ ++ uint64_t LogicalCPUID; /* The logical CPUID of the node*/ + u16 HostBiosSrvc1; /* Word sized general purpose field for use by host BIOS. Scratch space.*/ + u32 HostBiosSrvc2; /* Dword sized general purpose field for use by host BIOS. Scratch space.*/ + u16 DimmQRPresent; /* QuadRank DIMM present?*/ +@@ -558,12 +563,20 @@ struct DCTStatStruc { /* A per Node structure*/ + u8 ClToNB_flag; /* is used to restore ClLinesToNbDis bit after memory */ + u32 NodeSysBase; /* for channel interleave usage */ + ++ /* Fam15h specific backup variables */ ++ uint8_t SwNbPstateLoDis; ++ uint8_t NbPstateDisOnP0; ++ uint8_t NbPstateThreshold; ++ uint8_t NbPstateHi; ++ + /* New for LB Support */ + u8 NodePresent; + u32 dev_host; + u32 dev_map; + u32 dev_dct; + u32 dev_nbmisc; ++ u32 dev_link; ++ u32 dev_nbctl; + u8 TargetFreq; + u8 TargetCASL; + u8 CtrlWrd3; +@@ -596,9 +609,10 @@ struct DCTStatStruc { /* A per Node structure*/ + uint8_t DimmBanks[MAX_DIMMS_SUPPORTED]; + uint8_t DimmWidth[MAX_DIMMS_SUPPORTED]; + uint8_t DimmRegistered[MAX_DIMMS_SUPPORTED]; ++ uint8_t DimmLoadReduced[MAX_DIMMS_SUPPORTED]; + + uint64_t DimmManufacturerID[MAX_DIMMS_SUPPORTED]; +- char DimmPartNumber[MAX_DIMMS_SUPPORTED][SPD_PARTN_LENGTH]; ++ char DimmPartNumber[MAX_DIMMS_SUPPORTED][SPD_PARTN_LENGTH+1]; + uint16_t DimmRevisionNumber[MAX_DIMMS_SUPPORTED]; + uint32_t DimmSerialNumber[MAX_DIMMS_SUPPORTED]; + } __attribute__((packed)); +@@ -701,7 +715,64 @@ struct amd_s3_persistent_mct_channel_data { + /* Other (1 dword) */ + uint32_t f3x58; + +- /* TOTAL: 250 dwords */ ++ /* Family 15h-specific registers (90 dwords) */ ++ uint32_t f2x200; ++ uint32_t f2x204; ++ uint32_t f2x208; ++ uint32_t f2x20c; ++ uint32_t f2x210[4]; /* [nb pstate] */ ++ uint32_t f2x214; ++ uint32_t f2x218; ++ uint32_t f2x21c; ++ uint32_t f2x22c; ++ uint32_t f2x230; ++ uint32_t f2x234; ++ uint32_t f2x238; ++ uint32_t f2x23c; ++ uint32_t f2x240; ++ uint32_t f2x9cx0d0fe003; ++ uint32_t f2x9cx0d0fe013; ++ uint32_t f2x9cx0d0f0_8_0_1f[9]; /* [lane]*/ ++ uint32_t f2x9cx0d0f201f; ++ uint32_t f2x9cx0d0f211f; ++ uint32_t f2x9cx0d0f221f; ++ uint32_t f2x9cx0d0f801f; ++ uint32_t f2x9cx0d0f811f; ++ uint32_t f2x9cx0d0f821f; ++ uint32_t f2x9cx0d0fc01f; ++ uint32_t f2x9cx0d0fc11f; ++ uint32_t f2x9cx0d0fc21f; ++ uint32_t f2x9cx0d0f4009; ++ uint32_t f2x9cx0d0f0_8_0_02[9]; /* [lane]*/ ++ uint32_t f2x9cx0d0f0_8_0_06[9]; /* [lane]*/ ++ uint32_t f2x9cx0d0f0_8_0_0a[9]; /* [lane]*/ ++ uint32_t f2x9cx0d0f2002; ++ uint32_t f2x9cx0d0f2102; ++ uint32_t f2x9cx0d0f2202; ++ uint32_t f2x9cx0d0f8002; ++ uint32_t f2x9cx0d0f8006; ++ uint32_t f2x9cx0d0f800a; ++ uint32_t f2x9cx0d0f8102; ++ uint32_t f2x9cx0d0f8106; ++ uint32_t f2x9cx0d0f810a; ++ uint32_t f2x9cx0d0fc002; ++ uint32_t f2x9cx0d0fc006; ++ uint32_t f2x9cx0d0fc00a; ++ uint32_t f2x9cx0d0fc00e; ++ uint32_t f2x9cx0d0fc012; ++ uint32_t f2x9cx0d0f2031; ++ uint32_t f2x9cx0d0f2131; ++ uint32_t f2x9cx0d0f2231; ++ uint32_t f2x9cx0d0f8031; ++ uint32_t f2x9cx0d0f8131; ++ uint32_t f2x9cx0d0f8231; ++ uint32_t f2x9cx0d0fc031; ++ uint32_t f2x9cx0d0fc131; ++ uint32_t f2x9cx0d0fc231; ++ uint32_t f2x9cx0d0f0_0_f_31[9]; /* [lane] */ ++ uint32_t f2x9cx0d0f8021; ++ ++ /* TOTAL: 340 dwords */ + } __attribute__((packed)); + + struct amd_s3_persistent_node_data { +@@ -746,18 +817,19 @@ struct amd_s3_persistent_data { + Local Configuration Status (DCTStatStruc.Status[31:0]) + ===============================================================================*/ + #define SB_Registered 0 /* All DIMMs are Registered*/ +-#define SB_ECCDIMMs 1 /* All banks ECC capable*/ +-#define SB_PARDIMMs 2 /* All banks Addr/CMD Parity capable*/ +-#define SB_DiagClks 3 /* Jedec ALL slots clock enable diag mode*/ +-#define SB_128bitmode 4 /* DCT in 128-bit mode operation*/ +-#define SB_64MuxedMode 5 /* DCT in 64-bit mux'ed mode.*/ +-#define SB_2TMode 6 /* 2T CMD timing mode is enabled.*/ +-#define SB_SWNodeHole 7 /* Remapping of Node Base on this Node to create a gap.*/ +-#define SB_HWHole 8 /* Memory Hole created on this Node using HW remapping.*/ +-#define SB_Over400MHz 9 /* DCT freq >= 400MHz flag*/ +-#define SB_DQSPos_Pass2 10 /* Using for TrainDQSPos DIMM0/1, when freq>=400MHz*/ +-#define SB_DQSRcvLimit 11 /* Using for DQSRcvEnTrain to know we have reached to upper bound.*/ +-#define SB_ExtConfig 12 /* Indicator the default setting for extend PCI configuration support*/ ++#define SB_LoadReduced 1 /* All DIMMs are Load-Reduced*/ ++#define SB_ECCDIMMs 2 /* All banks ECC capable*/ ++#define SB_PARDIMMs 3 /* All banks Addr/CMD Parity capable*/ ++#define SB_DiagClks 4 /* Jedec ALL slots clock enable diag mode*/ ++#define SB_128bitmode 5 /* DCT in 128-bit mode operation*/ ++#define SB_64MuxedMode 6 /* DCT in 64-bit mux'ed mode.*/ ++#define SB_2TMode 7 /* 2T CMD timing mode is enabled.*/ ++#define SB_SWNodeHole 8 /* Remapping of Node Base on this Node to create a gap.*/ ++#define SB_HWHole 9 /* Memory Hole created on this Node using HW remapping.*/ ++#define SB_Over400MHz 10 /* DCT freq >= 400MHz flag*/ ++#define SB_DQSPos_Pass2 11 /* Using for TrainDQSPos DIMM0/1, when freq>=400MHz*/ ++#define SB_DQSRcvLimit 12 /* Using for DQSRcvEnTrain to know we have reached to upper bound.*/ ++#define SB_ExtConfig 13 /* Indicator the default setting for extend PCI configuration support*/ + + + /*=============================================================================== +@@ -775,17 +847,18 @@ struct amd_s3_persistent_data { + 266=266MHz (DDR533) + 333=333MHz (DDR667) + 400=400MHz (DDR800)*/ +-#define NV_ECC_CAP 4 /* Bus ECC capable (1-bits) ++#define NV_MIN_MEMCLK 4 /* Minimum platform demonstrated Memclock (10-bits) */ ++#define NV_ECC_CAP 5 /* Bus ECC capable (1-bits) + 0=Platform not capable + 1=Platform is capable*/ +-#define NV_4RANKType 5 /* Quad Rank DIMM slot type (2-bits) ++#define NV_4RANKType 6 /* Quad Rank DIMM slot type (2-bits) + 0=Normal + 1=R4 (4-Rank Registered DIMMs in AMD server configuration) + 2=S4 (Unbuffered SO-DIMMs)*/ +-#define NV_BYPMAX 6 /* Value to set DcqBypassMax field (See Function 2, Offset 94h, [27:24] of BKDG for field definition). ++#define NV_BYPMAX 7 /* Value to set DcqBypassMax field (See Function 2, Offset 94h, [27:24] of BKDG for field definition). + 4=4 times bypass (normal for non-UMA systems) + 7=7 times bypass (normal for UMA systems)*/ +-#define NV_RDWRQBYP 7 /* Value to set RdWrQByp field (See Function 2, Offset A0h, [3:2] of BKDG for field definition). ++#define NV_RDWRQBYP 8 /* Value to set RdWrQByp field (See Function 2, Offset A0h, [3:2] of BKDG for field definition). + 2=8 times (normal for non-UMA systems) + 3=16 times (normal for UMA systems)*/ + +@@ -848,8 +921,9 @@ struct amd_s3_persistent_data { + #define NV_ECCRedir 54 /* Dram ECC Redirection enable*/ + #define NV_DramBKScrub 55 /* Dram ECC Background Scrubber CTL*/ + #define NV_L2BKScrub 56 /* L2 ECC Background Scrubber CTL*/ +-#define NV_DCBKScrub 57 /* DCache ECC Background Scrubber CTL*/ +-#define NV_CS_SpareCTL 58 /* Chip Select Spare Control bit 0: ++#define NV_L3BKScrub 57 /* L3 ECC Background Scrubber CTL*/ ++#define NV_DCBKScrub 58 /* DCache ECC Background Scrubber CTL*/ ++#define NV_CS_SpareCTL 59 /* Chip Select Spare Control bit 0: + 0=disable Spare + 1=enable Spare */ + /* Chip Select Spare Control bit 1-4: +@@ -900,10 +974,12 @@ void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly, u8 FinalVa + void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel); + void mctGet_PS_Cfg_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 dct); + void InterleaveBanks_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct); +-void mct_SetDramConfigHi_D(struct DCTStatStruc *pDCTstat, u32 dct, u32 DramConfigHi); ++void mct_SetDramConfigHi_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 dct, u32 DramConfigHi); + void mct_DramInit_Hw_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct); + void mct_SetClToNB_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); + void mct_SetWbEnhWsbDis_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); ++void mct_ForceNBPState0_En_Fam15(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); ++void mct_ForceNBPState0_Dis_Fam15(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); + void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Pass); + void mct_EnableDimmEccEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 _DisableDramECC); + u32 procOdtWorkaround(struct DCTStatStruc *pDCTstat, u32 dct, u32 val); +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h +index c40ea1a..f6aa755 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h +@@ -98,6 +98,15 @@ static u32 bsf(u32 x) + + u32 SetUpperFSbase(u32 addr_hi); + ++static void proc_MFENCE(void) ++{ ++ __asm__ volatile ( ++ "outb %%al, $0xed\n\t" /* _EXECFENCE */ ++ "mfence\n\t" ++ :::"memory" ++ ); ++} ++ + static void proc_CLFLUSH(u32 addr_hi) + { + SetUpperFSbase(addr_hi); +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctardk5.c b/src/northbridge/amd/amdmct/mct_ddr3/mctardk5.c +index 126642b..3df262b 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctardk5.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctardk5.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -17,6 +18,8 @@ + * Foundation, Inc. + */ + ++/* AM3/ASB2/C32/G34 DDR3 */ ++ + static void Get_ChannelPS_Cfg0_D(u8 MAAdimms, u8 Speed, u8 MAAload, + u32 *AddrTmgCTL, u32 *ODC_CTL, + u8 *CMDmode); +@@ -24,17 +27,23 @@ static void Get_ChannelPS_Cfg0_D(u8 MAAdimms, u8 Speed, u8 MAAload, + void mctGet_PS_Cfg_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u32 dct) + { +- Get_ChannelPS_Cfg0_D(pDCTstat->MAdimms[dct], pDCTstat->Speed, +- pDCTstat->MAload[dct], +- &(pDCTstat->CH_ADDR_TMG[dct]), &(pDCTstat->CH_ODC_CTL[dct]), +- &pDCTstat->_2Tmode); ++ if (is_fam15h()) { ++ pDCTstat->CH_ADDR_TMG[dct] = fam15h_address_timing_compensation_code(pDCTstat, dct); ++ pDCTstat->CH_ODC_CTL[dct] = fam15h_output_driver_compensation_code(pDCTstat, dct); ++ pDCTstat->_2Tmode = fam15h_slow_access_mode(pDCTstat, dct); ++ } else { ++ Get_ChannelPS_Cfg0_D(pDCTstat->MAdimms[dct], pDCTstat->Speed, ++ pDCTstat->MAload[dct], ++ &(pDCTstat->CH_ADDR_TMG[dct]), &(pDCTstat->CH_ODC_CTL[dct]), ++ &pDCTstat->_2Tmode); ++ ++ pDCTstat->CH_ODC_CTL[dct] |= 0x20000000; /* 60ohms */ ++ } + + pDCTstat->CH_EccDQSLike[0] = 0x0403; + pDCTstat->CH_EccDQSScale[0] = 0x70; + pDCTstat->CH_EccDQSLike[1] = 0x0403; + pDCTstat->CH_EccDQSScale[1] = 0x70; +- +- pDCTstat->CH_ODC_CTL[dct] |= 0x20000000; /* 60ohms */ + } + + /* +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctcsi_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctcsi_d.c +index f1fd7a5..a1cdfa6 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctcsi_d.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctcsi_d.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -35,7 +36,6 @@ void InterleaveBanks_D(struct MCTStatStruc *pMCTstat, + + u32 dev; + u32 reg; +- u32 reg_off; + u32 val; + u32 val_lo, val_hi; + +@@ -44,16 +44,15 @@ void InterleaveBanks_D(struct MCTStatStruc *pMCTstat, + EnChipSels = 0; + + dev = pDCTstat->dev_dct; +- reg_off = 0x100 * dct; + + ChipSel = 0; /* Find out if current configuration is capable */ + while (DoIntlv && (ChipSel < MAX_CS_SUPPORTED)) { +- reg = 0x40+(ChipSel<<2) + reg_off; /* Dram CS Base 0 */ +- val = Get_NB32(dev, reg); ++ reg = 0x40+(ChipSel<<2); /* Dram CS Base 0 */ ++ val = Get_NB32_DCT(dev, dct, reg); + if ( val & (1<<CSEnable)) { + EnChipSels++; +- reg = 0x60+((ChipSel>>1)<<2)+reg_off; /*Dram CS Mask 0 */ +- val = Get_NB32(dev, reg); ++ reg = 0x60+((ChipSel>>1)<<2); /*Dram CS Mask 0 */ ++ val = Get_NB32_DCT(dev, dct, reg); + val >>= 19; + val &= 0x3ff; + val++; +@@ -63,8 +62,8 @@ void InterleaveBanks_D(struct MCTStatStruc *pMCTstat, + /*If mask sizes not same then skip */ + if (val != MemSize) + break; +- reg = 0x80 + reg_off; /*Dram Bank Addressing */ +- val = Get_NB32(dev, reg); ++ reg = 0x80; /*Dram Bank Addressing */ ++ val = Get_NB32_DCT(dev, dct, reg); + val >>= (ChipSel>>1)<<2; + val &= 0x0f; + if(EnChipSels == 1) +@@ -103,8 +102,8 @@ void InterleaveBanks_D(struct MCTStatStruc *pMCTstat, + BitDelta = bsf(AddrHiMask) - bsf(AddrLoMask); + + for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel++) { +- reg = 0x40+(ChipSel<<2) + reg_off; /*Dram CS Base 0 */ +- val = Get_NB32(dev, reg); ++ reg = 0x40+(ChipSel<<2); /*Dram CS Base 0 */ ++ val = Get_NB32_DCT(dev, dct, reg); + if (val & 3) { + val_lo = val & AddrLoMask; + val_hi = val & AddrHiMask; +@@ -114,13 +113,13 @@ void InterleaveBanks_D(struct MCTStatStruc *pMCTstat, + val_hi >>= BitDelta; + val |= val_lo; + val |= val_hi; +- Set_NB32(dev, reg, val); ++ Set_NB32_DCT(dev, dct, reg, val); + + if(ChipSel & 1) + continue; + +- reg = 0x60 + ((ChipSel>>1)<<2) + reg_off; /*Dram CS Mask 0 */ +- val = Get_NB32(dev, reg); ++ reg = 0x60 + ((ChipSel>>1)<<2); /*Dram CS Mask 0 */ ++ val = Get_NB32_DCT(dev, dct, reg); + val_lo = val & AddrLoMask; + val_hi = val & AddrHiMask; + val &= AddrLoMaskN; +@@ -129,7 +128,7 @@ void InterleaveBanks_D(struct MCTStatStruc *pMCTstat, + val_hi >>= BitDelta; + val |= val_lo; + val |= val_hi; +- Set_NB32(dev, reg, val); ++ Set_NB32_DCT(dev, dct, reg, val); + } + } + } /* DoIntlv */ +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c +index cc2f43a..740edae 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c +@@ -18,6 +18,12 @@ + * Foundation, Inc. + */ + ++static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, ++ uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg); ++ ++static void read_read_dqs_timing_control_registers(uint16_t* current_total_delay, ++ uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg); ++ + static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u16 like, + u8 scale, u8 ChipSel); +@@ -37,7 +43,7 @@ static void FlushDQSTestPattern_D(struct DCTStatStruc *pDCTstat, + u32 addr_lo); + static void SetTargetWTIO_D(u32 TestAddr); + static void ResetTargetWTIO_D(void); +-void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index); ++void ResetDCTWrPtr_D(u32 dev, uint8_t dct, u32 index_reg, u32 index); + u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); + static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat, +@@ -54,6 +60,7 @@ static void proc_IOCLFLUSH_D(u32 addr_hi); + static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 ChipSel); + + #define DQS_TRAIN_DEBUG 0 ++// #define PRINT_PASS_FAIL_BITMAPS 1 + + static void print_debug_dqs(const char *str, u32 val, u8 level) + { +@@ -198,18 +205,20 @@ void TrainReceiverEn_D(struct MCTStatStruc *pMCTstat, + pDCTstat = pDCTstatA + Node; + + if (pDCTstat->DCTSysLimit) { +- val = Get_NB32(pDCTstat->dev_dct, 0x78); +- val |= 1 <<DqsRcvEnTrain; +- Set_NB32(pDCTstat->dev_dct, 0x78, val); +- val = Get_NB32(pDCTstat->dev_dct, 0x78 + 0x100); +- val |= 1 <<DqsRcvEnTrain; +- Set_NB32(pDCTstat->dev_dct, 0x78 + 0x100, val); ++ if (!is_fam15h()) { ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x78); ++ val |= 1 <<DqsRcvEnTrain; ++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x78, val); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x78); ++ val |= 1 <<DqsRcvEnTrain; ++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x78, val); ++ } + mct_TrainRcvrEn_D(pMCTstat, pDCTstat, Pass); + } + } + } + +-static void SetEccDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, ++static void SetEccDQSRdWrPos_D_Fam10(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 ChipSel) + { + u8 channel; +@@ -268,68 +277,150 @@ static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat, + pDCTstat->DQSDelay = (u8)DQSDelay; + } + +-static void write_dqs_write_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dimm, uint32_t index_reg) ++static void read_dqs_write_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) ++{ ++ uint32_t dword; ++ uint32_t mask; ++ ++ if (is_fam15h()) ++ mask = 0xff; ++ else ++ mask = 0x7f; ++ ++ /* Lanes 0 - 3 */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x1 | (dimm << 8)); ++ delay[3] = (dword >> 24) & mask; ++ delay[2] = (dword >> 16) & mask; ++ delay[1] = (dword >> 8) & mask; ++ delay[0] = dword & mask; ++ ++ /* Lanes 4 - 7 */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x2 | (dimm << 8)); ++ delay[7] = (dword >> 24) & mask; ++ delay[6] = (dword >> 16) & mask; ++ delay[5] = (dword >> 8) & mask; ++ delay[4] = dword & mask; ++ ++ /* Lane 8 (ECC) */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x3 | (dimm << 8)); ++ delay[8] = dword & mask; ++} ++ ++static void write_dqs_write_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) + { + uint32_t dword; ++ uint32_t mask; ++ ++ if (is_fam15h()) ++ mask = 0xff; ++ else ++ mask = 0x7f; + + /* Lanes 0 - 3 */ +- dword = Get_NB32_index_wait(dev, index_reg, 0x1 | (dimm << 8)); +- dword &= ~0x7f7f7f7f; +- dword |= (delay[3] & 0x7f) << 24; +- dword |= (delay[2] & 0x7f) << 16; +- dword |= (delay[1] & 0x7f) << 8; +- dword |= delay[0] & 0x7f; +- Set_NB32_index_wait(dev, index_reg, 0x1 | (dimm << 8), dword); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x1 | (dimm << 8)); ++ dword &= ~(mask << 24); ++ dword &= ~(mask << 16); ++ dword &= ~(mask << 8); ++ dword &= ~mask; ++ dword |= (delay[3] & mask) << 24; ++ dword |= (delay[2] & mask) << 16; ++ dword |= (delay[1] & mask) << 8; ++ dword |= delay[0] & mask; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x1 | (dimm << 8), dword); + + /* Lanes 4 - 7 */ +- dword = Get_NB32_index_wait(dev, index_reg, 0x2 | (dimm << 8)); +- dword &= ~0x7f7f7f7f; +- dword |= (delay[7] & 0x7f) << 24; +- dword |= (delay[6] & 0x7f) << 16; +- dword |= (delay[5] & 0x7f) << 8; +- dword |= delay[4] & 0x7f; +- Set_NB32_index_wait(dev, index_reg, 0x2 | (dimm << 8), dword); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x2 | (dimm << 8)); ++ dword &= ~(mask << 24); ++ dword &= ~(mask << 16); ++ dword &= ~(mask << 8); ++ dword &= ~mask; ++ dword |= (delay[7] & mask) << 24; ++ dword |= (delay[6] & mask) << 16; ++ dword |= (delay[5] & mask) << 8; ++ dword |= delay[4] & mask; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x2 | (dimm << 8), dword); + + /* Lane 8 (ECC) */ +- dword = Get_NB32_index_wait(dev, index_reg, 0x3 | (dimm << 8)); +- dword &= ~0x0000007f; +- dword |= delay[8] & 0x7f; +- Set_NB32_index_wait(dev, index_reg, 0x3 | (dimm << 8), dword); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x3 | (dimm << 8)); ++ dword &= ~mask; ++ dword |= delay[8] & mask; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x3 | (dimm << 8), dword); + } + +-static void write_dqs_read_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dimm, uint32_t index_reg) ++static void read_dqs_read_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) + { + uint32_t dword; ++ uint32_t mask; ++ ++ if (is_fam15h()) ++ mask = 0x3e; ++ else ++ mask = 0x3f; + + /* Lanes 0 - 3 */ +- dword = Get_NB32_index_wait(dev, index_reg, 0x5 | (dimm << 8)); +- dword &= ~0x3f3f3f3f; +- dword |= (delay[3] & 0x3f) << 24; +- dword |= (delay[2] & 0x3f) << 16; +- dword |= (delay[1] & 0x3f) << 8; +- dword |= delay[0] & 0x3f; +- Set_NB32_index_wait(dev, index_reg, 0x5 | (dimm << 8), dword); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x5 | (dimm << 8)); ++ delay[3] = (dword >> 24) & mask; ++ delay[2] = (dword >> 16) & mask; ++ delay[1] = (dword >> 8) & mask; ++ delay[0] = dword & mask; + + /* Lanes 4 - 7 */ +- dword = Get_NB32_index_wait(dev, index_reg, 0x6 | (dimm << 8)); +- dword &= ~0x3f3f3f3f; +- dword |= (delay[7] & 0x3f) << 24; +- dword |= (delay[6] & 0x3f) << 16; +- dword |= (delay[5] & 0x3f) << 8; +- dword |= delay[4] & 0x3f; +- Set_NB32_index_wait(dev, index_reg, 0x6 | (dimm << 8), dword); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x6 | (dimm << 8)); ++ delay[7] = (dword >> 24) & mask; ++ delay[6] = (dword >> 16) & mask; ++ delay[5] = (dword >> 8) & mask; ++ delay[4] = dword & mask; + + /* Lane 8 (ECC) */ +- dword = Get_NB32_index_wait(dev, index_reg, 0x7 | (dimm << 8)); +- dword &= ~0x0000003f; +- dword |= delay[8] & 0x3f; +- Set_NB32_index_wait(dev, index_reg, 0x7 | (dimm << 8), dword); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x7 | (dimm << 8)); ++ delay[8] = dword & mask; ++} ++ ++static void write_dqs_read_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) ++{ ++ uint32_t dword; ++ uint32_t mask; ++ ++ if (is_fam15h()) ++ mask = 0x3e; ++ else ++ mask = 0x3f; ++ ++ /* Lanes 0 - 3 */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x5 | (dimm << 8)); ++ dword &= ~(mask << 24); ++ dword &= ~(mask << 16); ++ dword &= ~(mask << 8); ++ dword &= ~mask; ++ dword |= (delay[3] & mask) << 24; ++ dword |= (delay[2] & mask) << 16; ++ dword |= (delay[1] & mask) << 8; ++ dword |= delay[0] & mask; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x5 | (dimm << 8), dword); ++ ++ /* Lanes 4 - 7 */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x6 | (dimm << 8)); ++ dword &= ~(mask << 24); ++ dword &= ~(mask << 16); ++ dword &= ~(mask << 8); ++ dword &= ~mask; ++ dword |= (delay[7] & mask) << 24; ++ dword |= (delay[6] & mask) << 16; ++ dword |= (delay[5] & mask) << 8; ++ dword |= delay[4] & mask; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x6 | (dimm << 8), dword); ++ ++ /* Lane 8 (ECC) */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x7 | (dimm << 8)); ++ dword &= ~mask; ++ dword |= delay[8] & mask; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x7 | (dimm << 8), dword); + } + + /* DQS Position Training + * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.3 + */ +-static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, ++static void TrainDQSRdWrPos_D_Fam10(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) + { + u32 Errors; +@@ -406,7 +497,7 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, + if (pDCTstat->DIMMValidDCT[Channel] == 0) /* mct_BeforeTrainDQSRdWrPos_D */ + continue; + +- index_reg = 0x98 + 0x100 * Channel; ++ index_reg = 0x98; + + dual_rank = 0; + Receiver = mct_InitReceiver_D(pDCTstat, Channel); +@@ -462,7 +553,7 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, + break; + + /* Commit the current Write Data Timing settings to the hardware registers */ +- write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg); ++ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, Channel, (Receiver >> 1), index_reg); + + /* Write the DRAM training pattern to the base test address */ + WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); +@@ -479,7 +570,7 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, + current_read_dqs_delay[lane] = test_read_dqs_delay; + + /* Commit the current Read DQS Timing Control settings to the hardware registers */ +- write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg); ++ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, Channel, (Receiver >> 1), index_reg); + + /* Initialize test result variable */ + bytelane_test_results = 0xff; +@@ -545,7 +636,7 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, + passing_dqs_delay_found[lane] = 1; + + /* Commit the current Read DQS Timing Control settings to the hardware registers */ +- write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg); ++ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, Channel, (Receiver >> 1), index_reg); + + /* Exit the DRAM Write Data Timing Loop */ + write_dqs_delay_stepping_done[lane] = 1; +@@ -579,7 +670,7 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, + current_write_dqs_delay[lane] = test_write_dqs_delay; + + /* Commit the current Write Data Timing settings to the hardware registers */ +- write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg); ++ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, Channel, (Receiver >> 1), index_reg); + + /* Write the DRAM training pattern to the base test address */ + WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); +@@ -674,7 +765,7 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, + current_read_dqs_delay[lane] = (best_pos + (best_count / 2)); + + /* Commit the current Read DQS Timing Control settings to the hardware registers */ +- write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg); ++ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, Channel, (Receiver >> 1), index_reg); + + /* Save the final Read DQS Timing Control settings for later use */ + pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][DQS_READDIR][lane] = current_read_dqs_delay[lane]; +@@ -717,7 +808,7 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, + current_write_dqs_delay[lane] = (best_pos + (best_count / 2)); + + /* Commit the current Write Data Timing settings to the hardware registers */ +- write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg); ++ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, Channel, (Receiver >> 1), index_reg); + + /* Save the final Write Data Timing settings for later use */ + pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][DQS_WRITEDIR][lane] = current_write_dqs_delay[lane]; +@@ -787,6 +878,831 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, + printk(BIOS_DEBUG, "TrainDQSRdWrPos: Done\n\n"); + } + ++/* Calcuate and set MaxRdLatency ++ * Algorithm detailed in the Fam15h BKDG Rev. 3.14 section 2.10.5.8.5 ++ */ ++static void Calc_SetMaxRdLatency_D_Fam15(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, uint8_t dct) ++{ ++ uint8_t dimm; ++ uint8_t lane; ++ uint32_t dword; ++ uint32_t dword2; ++ uint32_t max_delay; ++ uint8_t mem_clk = 0; ++ uint8_t nb_pstate; ++ uint32_t nb_clk; ++ uint32_t p = 0; ++ uint32_t n = 0; ++ uint32_t t = 0; ++ uint16_t current_phy_phase_delay[MAX_BYTE_LANES]; ++ uint16_t current_read_dqs_delay[MAX_BYTE_LANES]; ++ ++ uint32_t index_reg = 0x98; ++ uint32_t dev = pDCTstat->dev_dct; ++ uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933}; ++ ++ /* P is specified in PhyCLKs (1/2 MEMCLKs) */ ++ for (nb_pstate = 0; nb_pstate < 2; nb_pstate++) { ++ /* 2.10.5.8.5 (2) */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000004); ++ if ((!(dword & (0x1 << 21))) && (!(dword & (0x1 << 13))) && (!(dword & (0x1 << 5)))) ++ p += 1; ++ else ++ p += 2; ++ ++ /* 2.10.5.8.5 (3) */ ++ dword = Get_NB32_DCT_NBPstate(dev, dct, nb_pstate, 0x210) & 0xf; /* Retrieve RdPtrInit */ ++ p += (9 - dword); ++ ++ /* 2.10.5.8.5 (4) */ ++ p += 5; ++ ++ /* 2.10.5.8.5 (5) */ ++ dword = Get_NB32_DCT(dev, dct, 0xa8); ++ dword2 = Get_NB32_DCT(dev, dct, 0x90); ++ if ((!(dword & (0x1 << 5))) && (!(dword2 & (0x1 << 16)))) ++ p += 2; ++ ++ /* 2.10.5.8.5 (6) */ ++ dword = Get_NB32_DCT(dev, dct, 0x200) & 0x1f; /* Retrieve Tcl */ ++ p += (2 * (dword - 1)); ++ ++ /* 2.10.5.8.5 (7) */ ++ max_delay = 0; ++ for (dimm = 0; dimm < 4; dimm++) { ++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, dimm * 2)) ++ continue; ++ ++ read_dqs_receiver_enable_control_registers(current_phy_phase_delay, dev, dct, dimm, index_reg); ++ read_read_dqs_timing_control_registers(current_read_dqs_delay, dev, dct, dimm, index_reg); ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) ++ if ((current_phy_phase_delay[lane] + current_read_dqs_delay[lane]) > max_delay) ++ max_delay = (current_phy_phase_delay[lane] + current_read_dqs_delay[lane]); ++ } ++ p += (max_delay >> 5); ++ ++ /* 2.10.5.8.5 (8) */ ++ p += 5; ++ ++ /* 2.10.5.8.5 (9) */ ++ t += 800; ++ ++ /* 2.10.5.8.5 (10) */ ++ mem_clk = Get_NB32_DCT(dev, dct, 0x94) & 0x1f; ++ dword = Get_NB32(pDCTstat->dev_nbctl, (0x160 + (nb_pstate * 4))); /* Retrieve NbDid, NbFid */ ++ nb_clk = (200 * (((dword >> 1) & 0x1f) + 0x4)) / (((dword >> 7) & 0x1)?2:1); ++ n = (((((uint64_t)p * 1000000000000ULL)/(((uint64_t)fam15h_freq_tab[mem_clk] * 1000000ULL) * 2)) + ((uint64_t)t)) * ((uint64_t)nb_clk * 1000)) / 1000000000ULL; ++ ++ /* 2.10.5.8.5 (11) */ ++ n -= 1; ++ ++ /* 2.10.5.8.5 (12) */ ++ dword = Get_NB32_DCT_NBPstate(dev, dct, nb_pstate, 0x210); ++ dword &= ~(0x3ff << 22); ++ dword |= (((n - 1) & 0x3ff) << 22); ++ Set_NB32_DCT_NBPstate(dev, dct, nb_pstate, 0x210, dword); ++ ++ /* Save result for later use */ ++ pDCTstat->CH_MaxRdLat[dct] = n; ++ } ++} ++ ++static void start_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t Receiver) ++{ ++ uint32_t dword; ++ uint32_t dev = pDCTstat->dev_dct; ++ ++ /* 2.10.5.7.1.1 ++ * It appears that the DCT only supports 8-beat burst length mode, ++ * so do nothing here... ++ */ ++ ++ /* Wait for CmdSendInProg == 0 */ ++ do { ++ dword = Get_NB32_DCT(dev, dct, 0x250); ++ } while (dword & (0x1 << 12)); ++ ++ /* Set CmdTestEnable = 1 */ ++ dword = Get_NB32_DCT(dev, dct, 0x250); ++ dword |= (0x1 << 2); ++ Set_NB32_DCT(dev, dct, 0x250, dword); ++ ++ /* 2.10.5.8.6.1.1 Send Activate Command (Target A) */ ++ dword = Get_NB32_DCT(dev, dct, 0x28c); ++ dword &= ~(0xff << 22); /* CmdChipSelect = Receiver */ ++ dword |= ((0x1 << Receiver) << 22); ++ dword &= ~(0x7 << 19); /* CmdBank = 0 */ ++ dword &= ~(0x3ffff); /* CmdAddress = 0 */ ++ dword |= (0x1 << 31); /* SendActCmd = 1 */ ++ Set_NB32_DCT(dev, dct, 0x28c, dword); ++ ++ /* Wait for SendActCmd == 0 */ ++ do { ++ dword = Get_NB32_DCT(dev, dct, 0x28c); ++ } while (dword & (0x1 << 31)); ++ ++ /* Wait 75 MEMCLKs. */ ++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 75); ++ ++ /* 2.10.5.8.6.1.1 Send Activate Command (Target B) */ ++ dword = Get_NB32_DCT(dev, dct, 0x28c); ++ dword &= ~(0xff << 22); /* CmdChipSelect = Receiver */ ++ dword |= ((0x1 << Receiver) << 22); ++ dword &= ~(0x7 << 19); /* CmdBank = 1 */ ++ dword |= (0x1 << 19); ++ dword &= ~(0x3ffff); /* CmdAddress = 0 */ ++ dword |= (0x1 << 31); /* SendActCmd = 1 */ ++ Set_NB32_DCT(dev, dct, 0x28c, dword); ++ ++ /* Wait for SendActCmd == 0 */ ++ do { ++ dword = Get_NB32_DCT(dev, dct, 0x28c); ++ } while (dword & (0x1 << 31)); ++ ++ /* Wait 75 MEMCLKs. */ ++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 75); ++} ++ ++static void stop_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t Receiver) ++{ ++ uint32_t dword; ++ uint32_t dev = pDCTstat->dev_dct; ++ ++ /* 2.10.5.8.6.1.1 Send Precharge Command */ ++ /* Wait 25 MEMCLKs. */ ++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 25); ++ ++ dword = Get_NB32_DCT(dev, dct, 0x28c); ++ dword &= ~(0xff << 22); /* CmdChipSelect = Receiver */ ++ dword |= ((0x1 << Receiver) << 22); ++ dword &= ~(0x7 << 19); /* CmdBank = 0 */ ++ dword &= ~(0x3ffff); /* CmdAddress = 0x400 */ ++ dword |= 0x400; ++ dword |= (0x1 << 30); /* SendPchgCmd = 1 */ ++ Set_NB32_DCT(dev, dct, 0x28c, dword); ++ ++ /* Wait for SendPchgCmd == 0 */ ++ do { ++ dword = Get_NB32_DCT(dev, dct, 0x28c); ++ } while (dword & (0x1 << 30)); ++ ++ /* Wait 25 MEMCLKs. */ ++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 25); ++ ++ /* Set CmdTestEnable = 0 */ ++ dword = Get_NB32_DCT(dev, dct, 0x250); ++ dword &= ~(0x1 << 2); ++ Set_NB32_DCT(dev, dct, 0x250, dword); ++} ++ ++static void read_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t Receiver, uint8_t lane) ++{ ++ uint32_t dword; ++ uint32_t dev = pDCTstat->dev_dct; ++ ++ start_dram_dqs_training_pattern_fam15(pMCTstat, pDCTstat, dct, Receiver); ++ ++ /* 2.10.5.8.6.1.2 */ ++ /* Configure DQMask */ ++ if (lane < 4) { ++ Set_NB32_DCT(dev, dct, 0x274, ~(0xff << (lane * 8))); ++ Set_NB32_DCT(dev, dct, 0x278, ~0x0); ++ } else if (lane < 8) { ++ Set_NB32_DCT(dev, dct, 0x274, ~0x0); ++ Set_NB32_DCT(dev, dct, 0x278, ~(0xff << (lane * 8))); ++ } else { ++ Set_NB32_DCT(dev, dct, 0x274, ~0x0); ++ Set_NB32_DCT(dev, dct, 0x278, ~0x0); ++ } ++ ++ dword = Get_NB32_DCT(dev, dct, 0x27c); ++ dword &= ~(0xff); /* EccMask = 0 */ ++ if ((lane != 8) || (pDCTstat->DimmECCPresent == 0)) ++ dword |= 0xff; /* EccMask = 0xff */ ++ Set_NB32_DCT(dev, dct, 0x27c, dword); ++ ++ dword = Get_NB32_DCT(dev, dct, 0x270); ++ dword &= ~(0x7ffff); /* DataPrbsSeed = 55555 */ ++// dword |= (0x55555); ++ dword |= (0x44443); /* Use AGESA seed */ ++ Set_NB32_DCT(dev, dct, 0x270, dword); ++ ++ /* 2.10.5.8.4 */ ++ dword = Get_NB32_DCT(dev, dct, 0x260); ++ dword &= ~(0x1fffff); /* CmdCount = 256 */ ++ dword |= 256; ++ Set_NB32_DCT(dev, dct, 0x260, dword); ++ ++ /* Configure Target A */ ++ dword = Get_NB32_DCT(dev, dct, 0x254); ++ dword &= ~(0x7 << 24); /* TgtChipSelect = Receiver */ ++ dword |= (Receiver & 0x7) << 24; ++ dword &= ~(0x7 << 21); /* TgtBank = 0 */ ++ dword &= ~(0x3ff); /* TgtAddress = 0 */ ++ Set_NB32_DCT(dev, dct, 0x254, dword); ++ ++ /* Configure Target B */ ++ dword = Get_NB32_DCT(dev, dct, 0x258); ++ dword &= ~(0x7 << 24); /* TgtChipSelect = Receiver */ ++ dword |= (Receiver & 0x7) << 24; ++ dword &= ~(0x7 << 21); /* TgtBank = 1 */ ++ dword |= (0x1 << 21); ++ dword &= ~(0x3ff); /* TgtAddress = 0 */ ++ Set_NB32_DCT(dev, dct, 0x258, dword); ++ ++ dword = Get_NB32_DCT(dev, dct, 0x250); ++ dword |= (0x1 << 3); /* ResetAllErr = 1 */ ++ dword &= ~(0x1 << 4); /* StopOnErr = 0 */ ++ dword &= ~(0x3 << 8); /* CmdTgt = 1 (Alternate between Target A and Target B) */ ++ dword |= (0x1 << 8); ++ dword &= ~(0x7 << 5); /* CmdType = 0 (Read) */ ++ dword |= (0x1 << 11); /* SendCmd = 1 */ ++ Set_NB32_DCT(dev, dct, 0x250, dword); ++ ++ /* 2.10.5.8.6.1.2 Wait for TestStatus == 1 and CmdSendInProg == 0 */ ++ do { ++ dword = Get_NB32_DCT(dev, dct, 0x250); ++ } while ((dword & (0x1 << 12)) || (!(dword & (0x1 << 10)))); ++ ++ dword = Get_NB32_DCT(dev, dct, 0x250); ++ dword &= ~(0x1 << 11); /* SendCmd = 0 */ ++ Set_NB32_DCT(dev, dct, 0x250, dword); ++ ++ stop_dram_dqs_training_pattern_fam15(pMCTstat, pDCTstat, dct, Receiver); ++} ++ ++static void write_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t Receiver, uint8_t lane) ++{ ++ uint32_t dword; ++ uint32_t dev = pDCTstat->dev_dct; ++ ++ start_dram_dqs_training_pattern_fam15(pMCTstat, pDCTstat, dct, Receiver); ++ ++ /* 2.10.5.8.6.1.2 */ ++ /* Configure DQMask */ ++ if (lane < 4) { ++ Set_NB32_DCT(dev, dct, 0x274, ~(0xff << (lane * 8))); ++ Set_NB32_DCT(dev, dct, 0x278, ~0x0); ++ } else if (lane < 8) { ++ Set_NB32_DCT(dev, dct, 0x274, ~0x0); ++ Set_NB32_DCT(dev, dct, 0x278, ~(0xff << (lane * 8))); ++ } else { ++ Set_NB32_DCT(dev, dct, 0x274, ~0x0); ++ Set_NB32_DCT(dev, dct, 0x278, ~0x0); ++ } ++ ++ dword = Get_NB32_DCT(dev, dct, 0x27c); ++ dword &= ~(0xff); /* EccMask = 0 */ ++ if ((lane != 8) || (pDCTstat->DimmECCPresent == 0)) ++ dword |= 0xff; /* EccMask = 0xff */ ++ Set_NB32_DCT(dev, dct, 0x27c, dword); ++ ++ dword = Get_NB32_DCT(dev, dct, 0x270); ++ dword &= ~(0x7ffff); /* DataPrbsSeed = 55555 */ ++// dword |= (0x55555); ++ dword |= (0x44443); /* Use AGESA seed */ ++ Set_NB32_DCT(dev, dct, 0x270, dword); ++ ++ /* 2.10.5.8.4 */ ++ dword = Get_NB32_DCT(dev, dct, 0x260); ++ dword &= ~(0x1fffff); /* CmdCount = 256 */ ++ dword |= 256; ++ Set_NB32_DCT(dev, dct, 0x260, dword); ++ ++ /* Configure Target A */ ++ dword = Get_NB32_DCT(dev, dct, 0x254); ++ dword &= ~(0x7 << 24); /* TgtChipSelect = Receiver */ ++ dword |= (Receiver & 0x7) << 24; ++ dword &= ~(0x7 << 21); /* TgtBank = 0 */ ++ dword &= ~(0x3ff); /* TgtAddress = 0 */ ++ Set_NB32_DCT(dev, dct, 0x254, dword); ++ ++ /* Configure Target B */ ++ dword = Get_NB32_DCT(dev, dct, 0x258); ++ dword &= ~(0x7 << 24); /* TgtChipSelect = Receiver */ ++ dword |= (Receiver & 0x7) << 24; ++ dword &= ~(0x7 << 21); /* TgtBank = 1 */ ++ dword |= (0x1 << 21); ++ dword &= ~(0x3ff); /* TgtAddress = 0 */ ++ Set_NB32_DCT(dev, dct, 0x258, dword); ++ ++ dword = Get_NB32_DCT(dev, dct, 0x250); ++ dword |= (0x1 << 3); /* ResetAllErr = 1 */ ++ dword &= ~(0x1 << 4); /* StopOnErr = 0 */ ++ dword &= ~(0x3 << 8); /* CmdTgt = 1 (Alternate between Target A and Target B) */ ++ dword |= (0x1 << 8); ++ dword &= ~(0x7 << 5); /* CmdType = 1 (Write) */ ++ dword |= (0x1 << 5); ++ dword |= (0x1 << 11); /* SendCmd = 1 */ ++ Set_NB32_DCT(dev, dct, 0x250, dword); ++ ++ /* 2.10.5.8.6.1.2 Wait for TestStatus == 1 and CmdSendInProg == 0 */ ++ do { ++ dword = Get_NB32_DCT(dev, dct, 0x250); ++ } while ((dword & (0x1 << 12)) || (!(dword & (0x1 << 10)))); ++ ++ dword = Get_NB32_DCT(dev, dct, 0x250); ++ dword &= ~(0x1 << 11); /* SendCmd = 0 */ ++ Set_NB32_DCT(dev, dct, 0x250, dword); ++ ++ stop_dram_dqs_training_pattern_fam15(pMCTstat, pDCTstat, dct, Receiver); ++} ++ ++/* DQS Position Training ++ * Algorithm detailed in the Fam15h BKDG Rev. 3.14 section 2.10.5.8.4 ++ */ ++static uint8_t TrainDQSRdWrPos_D_Fam15(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t receiver_start, uint8_t receiver_end, uint8_t lane_start, uint8_t lane_end) ++{ ++ uint8_t dimm; ++ uint8_t lane; ++ uint32_t dword; ++ uint32_t Errors; ++ uint8_t Receiver; ++ uint8_t dual_rank; ++ uint8_t write_iter; ++ uint8_t read_iter; ++ uint16_t initial_write_dqs_delay[MAX_BYTE_LANES]; ++ uint16_t initial_read_dqs_delay[MAX_BYTE_LANES]; ++ uint16_t initial_write_data_timing[MAX_BYTE_LANES]; ++ uint16_t current_write_data_delay[MAX_BYTE_LANES]; ++ uint16_t current_read_dqs_delay[MAX_BYTE_LANES]; ++ uint16_t current_write_dqs_delay[MAX_BYTE_LANES]; ++ uint8_t passing_dqs_delay_found[MAX_BYTE_LANES]; ++ uint8_t dqs_results_array[2][(lane_end - lane_start)][32][32]; /* [rank][lane][write step][read step] */ ++ ++ uint8_t last_pos = 0; ++ uint8_t cur_count = 0; ++ uint8_t best_pos = 0; ++ uint8_t best_count = 0; ++ ++ uint32_t index_reg = 0x98; ++ uint32_t dev = pDCTstat->dev_dct; ++ ++ /* Calculate and program MaxRdLatency */ ++ Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, dct); ++ ++ Errors = 0; ++ dual_rank = 0; ++ Receiver = mct_InitReceiver_D(pDCTstat, dct); ++ if (receiver_start > Receiver) ++ Receiver = receiver_start; ++ ++ /* There are four receiver pairs, loosely associated with chipselects. ++ * This is essentially looping over each DIMM. ++ */ ++ for (; Receiver < receiver_end; Receiver += 2) { ++ dimm = (Receiver >> 1); ++ if ((Receiver & 0x1) == 0) { ++ /* Even rank of DIMM */ ++ if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, Receiver+1)) ++ dual_rank = 1; ++ else ++ dual_rank = 0; ++ } ++ ++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, Receiver)) { ++ continue; ++ } ++ ++ /* Initialize variables */ ++ for (lane = lane_start; lane < lane_end; lane++) { ++ passing_dqs_delay_found[lane] = 0; ++ } ++ memset(dqs_results_array, 0, sizeof(dqs_results_array)); ++ ++ /* Read initial read / write DQS delays */ ++ read_dqs_write_timing_control_registers(initial_write_dqs_delay, dev, dct, dimm, index_reg); ++ read_dqs_read_data_timing_registers(initial_read_dqs_delay, dev, dct, dimm, index_reg); ++ ++ /* Read current settings of other (previously trained) lanes */ ++ read_dqs_write_data_timing_registers(initial_write_data_timing, dev, dct, dimm, index_reg); ++ memcpy(current_write_data_delay, initial_write_data_timing, sizeof(current_write_data_delay)); ++ ++ for (lane = lane_start; lane < lane_end; lane++) { ++ /* 2.10.5.8.4 (2) ++ * For each Write Data Delay value from Write DQS Delay to Write DQS Delay + 1 UI ++ */ ++ for (current_write_data_delay[lane] = initial_write_dqs_delay[lane]; current_write_data_delay[lane] < (initial_write_dqs_delay[lane] + 0x20); current_write_data_delay[lane]++) { ++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 16 current_write_data_delay[lane] ", current_write_data_delay[lane], 6); ++ ++ /* 2.10.5.8.4 (2 A) ++ * Commit the current Write Data Timing settings to the hardware registers ++ */ ++ write_dqs_write_data_timing_registers(current_write_data_delay, dev, dct, dimm, index_reg); ++ ++ /* 2.10.5.8.4 (2 B) ++ * Write the DRAM training pattern to the test address ++ */ ++ write_dram_dqs_training_pattern_fam15(pMCTstat, pDCTstat, dct, Receiver, lane); ++ ++ /* Read current settings of other (previously trained) lanes */ ++ read_dqs_read_data_timing_registers(current_read_dqs_delay, dev, dct, dimm, index_reg); ++ ++ /* 2.10.5.8.4 (2 C) ++ * For each Read DQS Delay value from 0 to 1 UI ++ */ ++ for (current_read_dqs_delay[lane] = 0; current_read_dqs_delay[lane] < 0x40; current_read_dqs_delay[lane] += 2) { ++ print_debug_dqs("\t\t\t\t\tTrainDQSRdWrPos: 161 current_read_dqs_delay[lane] ", current_read_dqs_delay[lane], 6); ++ ++ /* 2.10.5.8.4 (2 A i) ++ * Commit the current Read DQS Timing Control settings to the hardware registers ++ */ ++ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, dct, dimm, index_reg); ++ ++ /* 2.10.5.8.4 (2 A ii) ++ * Read the DRAM training pattern from the test address ++ */ ++ read_dram_dqs_training_pattern_fam15(pMCTstat, pDCTstat, dct, Receiver, lane); ++ ++ /* 2.10.5.8.4 (2 A iii) ++ * Record pass / fail status ++ */ ++ dword = Get_NB32_DCT(dev, dct, 0x268) & 0x3ffff; ++ if (dword & (0x3 << (lane * 2))) ++ dqs_results_array[Receiver & 0x1][lane - lane_start][current_write_data_delay[lane] - initial_write_dqs_delay[lane]][current_read_dqs_delay[lane] >> 1] = 0; /* Fail */ ++ else ++ dqs_results_array[Receiver & 0x1][lane - lane_start][current_write_data_delay[lane] - initial_write_dqs_delay[lane]][current_read_dqs_delay[lane] >> 1] = 1; /* Pass */ ++ } ++ } ++ ++ if (dual_rank && (Receiver & 0x1)) { ++ /* Overlay the previous rank test results with the current rank */ ++ for (write_iter = 0; write_iter < 32; write_iter++) { ++ for (read_iter = 0; read_iter < 32; read_iter++) { ++ if ((dqs_results_array[0][lane - lane_start][write_iter][read_iter]) ++ && (dqs_results_array[1][lane - lane_start][write_iter][read_iter])) ++ dqs_results_array[1][lane - lane_start][write_iter][read_iter] = 1; ++ else ++ dqs_results_array[1][lane - lane_start][write_iter][read_iter] = 0; ++ } ++ } ++ } ++ ++ /* Determine location and length of longest consecutive string of read passing values ++ * Output is stored in best_pos and best_count ++ */ ++ last_pos = 0; ++ cur_count = 0; ++ best_pos = 0; ++ best_count = 0; ++ for (write_iter = 0; write_iter < 32; write_iter++) { ++ for (read_iter = 0; read_iter < 32; read_iter++) { ++ if ((dqs_results_array[Receiver & 0x1][lane - lane_start][write_iter][read_iter]) && (read_iter < 31)) { ++ /* Pass */ ++ cur_count++; ++ } else { ++ /* Failure or end of loop */ ++ if (cur_count > best_count) { ++ best_count = cur_count; ++ best_pos = last_pos; ++ } ++ cur_count = 0; ++ last_pos = read_iter; ++ } ++ } ++ last_pos = 0; ++ } ++ ++ if (best_count > 2) { ++ /* Restore current settings of other (previously trained) lanes to the active array */ ++ memcpy(current_read_dqs_delay, initial_read_dqs_delay, sizeof(current_read_dqs_delay)); ++ ++ /* Program the Read DQS Timing Control register with the center of the passing window */ ++ current_read_dqs_delay[lane] = ((best_pos << 1) + ((best_count << 1) / 2)); ++ passing_dqs_delay_found[lane] = 1; ++ ++ /* Commit the current Read DQS Timing Control settings to the hardware registers */ ++ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, dct, dimm, index_reg); ++ ++ /* Save the final Read DQS Timing Control settings for later use */ ++ pDCTstat->CH_D_DIR_B_DQS[dct][Receiver >> 1][DQS_READDIR][lane] = current_read_dqs_delay[lane]; ++ ++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 142 largest read passing region ", best_count, 4); ++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 143 largest read passing region start ", best_pos, 4); ++ } else { ++ /* Reprogram the Read DQS Timing Control register with the original settings */ ++ write_dqs_read_data_timing_registers(initial_read_dqs_delay, dev, dct, dimm, index_reg); ++ } ++ ++ /* Determine location and length of longest consecutive string of write passing values ++ * Output is stored in best_pos and best_count ++ */ ++ last_pos = 0; ++ cur_count = 0; ++ best_pos = 0; ++ best_count = 0; ++ for (read_iter = 0; read_iter < 32; read_iter++) { ++ for (write_iter = 0; write_iter < 32; write_iter++) { ++ if ((dqs_results_array[Receiver & 0x1][lane - lane_start][write_iter][read_iter]) && (write_iter < 31)) { ++ /* Pass */ ++ cur_count++; ++ } else { ++ /* Failure or end of loop */ ++ if (cur_count > best_count) { ++ best_count = cur_count; ++ best_pos = last_pos; ++ } ++ cur_count = 0; ++ last_pos = write_iter; ++ } ++ } ++ last_pos = 0; ++ } ++ ++ if (best_count > 2) { ++ /* Restore current settings of other (previously trained) lanes to the active array */ ++ memcpy(current_write_dqs_delay, initial_write_data_timing, sizeof(current_write_data_delay)); ++ ++ /* Program the Write DQS Timing Control register with the optimal region within the passing window */ ++ if (pDCTstat->Status & (1 << SB_LoadReduced)) ++ current_write_dqs_delay[lane] = ((best_pos + initial_write_dqs_delay[lane]) + (best_count / 3)); ++ else ++ current_write_dqs_delay[lane] = ((best_pos + initial_write_dqs_delay[lane]) + (best_count / 2)); ++ passing_dqs_delay_found[lane] = 1; ++ ++ /* Commit the current Write DQS Timing Control settings to the hardware registers */ ++ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, dct, dimm, index_reg); ++ ++ /* Save the final Write Data Timing settings for later use */ ++ pDCTstat->CH_D_DIR_B_DQS[dct][Receiver >> 1][DQS_WRITEDIR][lane] = current_write_dqs_delay[lane]; ++ ++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 144 largest write passing region ", best_count, 4); ++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 145 largest write passing region start ", best_pos, 4); ++ } else { ++ /* Reprogram the Write DQS Timing Control register with the original settings */ ++ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, dct, dimm, index_reg); ++ } ++ } ++ ++#ifdef PRINT_PASS_FAIL_BITMAPS ++ for (lane = lane_start; lane < lane_end; lane++) { ++ for (read_iter = 0; read_iter < 32; read_iter++) { ++ for (write_iter = 0; write_iter < 32; write_iter++) { ++ if (dqs_results_array[Receiver & 0x1][lane - lane_start][write_iter][read_iter]) ++ printk(BIOS_DEBUG, "+"); ++ else ++ printk(BIOS_DEBUG, "."); ++ } ++ printk(BIOS_DEBUG, "\n"); ++ } ++ printk(BIOS_DEBUG, "\n\n"); ++ } ++#endif ++ ++ /* Flag failure(s) if present */ ++ for (lane = lane_start; lane < lane_end; lane++) { ++ if (!passing_dqs_delay_found[lane]) { ++ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 121 Unable to find passing region for lane ", lane, 2); ++ ++ /* Flag absence of passing window */ ++ Errors |= 1 << SB_NODQSPOS; ++ } ++ } ++ ++ pDCTstat->TrainErrors |= Errors; ++ pDCTstat->ErrStatus |= Errors; ++ ++#if DQS_TRAIN_DEBUG > 0 ++ { ++ u8 val; ++ u8 i; ++ u8 ChannelDTD, ReceiverDTD, Dir; ++ u8 *p; ++ ++ for (Dir = 0; Dir < 2; Dir++) { ++ if (Dir == 1) { ++ printk(BIOS_DEBUG, "TrainDQSRdWrPos: CH_D_DIR_B_DQS WR:\n"); ++ } else { ++ printk(BIOS_DEBUG, "TrainDQSRdWrPos: CH_D_DIR_B_DQS RD:\n"); ++ } ++ for (ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) { ++ printk(BIOS_DEBUG, "Channel: %02x\n", ChannelDTD); ++ for (ReceiverDTD = 0; ReceiverDTD < MAX_CS_SUPPORTED; ReceiverDTD += 2) { ++ printk(BIOS_DEBUG, "\t\tReceiver: %02x:", ReceiverDTD); ++ p = pDCTstat->CH_D_DIR_B_DQS[ChannelDTD][ReceiverDTD >> 1][Dir]; ++ for (i=0;i<8; i++) { ++ val = p[i]; ++ printk(BIOS_DEBUG, " %02x", val); ++ } ++ printk(BIOS_DEBUG, "\n"); ++ } ++ } ++ } ++ ++ } ++#endif ++ } ++ ++ /* Return 1 on success, 0 on failure */ ++ return !Errors; ++} ++ ++/* DQS Receiver Enable Cycle Training ++ * Algorithm detailed in the Fam15h BKDG Rev. 3.14 section 2.10.5.8.3 ++ */ ++static void TrainDQSReceiverEnCyc_D_Fam15(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat) ++{ ++ u32 Errors; ++ u8 Receiver; ++ u8 _DisableDramECC = 0; ++ u8 _Wrap32Dis = 0, _SSE2 = 0; ++ ++ u32 addr; ++ u32 cr4; ++ u32 lo, hi; ++ ++ uint8_t dct; ++ uint8_t prev; ++ uint8_t dimm; ++ uint8_t lane; ++ uint32_t dword; ++ uint32_t rx_en_offset; ++ uint16_t initial_phy_phase_delay[MAX_BYTE_LANES]; ++ uint16_t current_phy_phase_delay[MAX_BYTE_LANES]; ++ uint8_t dqs_results_array[1024]; ++ ++ uint16_t ren_step = 0x40; ++ uint32_t index_reg = 0x98; ++ uint32_t dev = pDCTstat->dev_dct; ++ ++ print_debug_dqs("\nTrainDQSReceiverEnCyc: Node_ID ", pDCTstat->Node_ID, 0); ++ cr4 = read_cr4(); ++ if (cr4 & (1<<9)) { ++ _SSE2 = 1; ++ } ++ cr4 |= (1<<9); /* OSFXSR enable SSE2 */ ++ write_cr4(cr4); ++ ++ addr = HWCR; ++ _RDMSR(addr, &lo, &hi); ++ if (lo & (1<<17)) { ++ _Wrap32Dis = 1; ++ } ++ lo |= (1<<17); /* HWCR.wrap32dis */ ++ _WRMSR(addr, lo, hi); /* allow 64-bit memory references in real mode */ ++ ++ /* Disable ECC correction of reads on the dram bus. */ ++ _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat); ++ ++ Errors = 0; ++ ++ for (dct = 0; dct < 2; dct++) { ++ /* Program D18F2x9C_x0D0F_E003_dct[1:0][DisAutoComp, DisablePredriverCal] */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe003); ++ dword &= ~(0x3 << 13); ++ dword |= (0x1 << 13); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fe003, dword); ++ } ++ ++ for (dct = 0; dct < 2; dct++) { ++ /* 2.10.5.6 */ ++ fam15EnableTrainingMode(pMCTstat, pDCTstat, dct, 1); ++ ++ /* 2.10.5.8.3 */ ++ Receiver = mct_InitReceiver_D(pDCTstat, dct); ++ ++ /* There are four receiver pairs, loosely associated with chipselects. ++ * This is essentially looping over each DIMM. ++ */ ++ for (; Receiver < 8; Receiver += 2) { ++ dimm = (Receiver >> 1); ++ ++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, Receiver)) { ++ continue; ++ } ++ ++ /* 2.10.5.8.3 (2) */ ++ read_dqs_receiver_enable_control_registers(initial_phy_phase_delay, dev, dct, dimm, index_reg); ++ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ /* Initialize variables */ ++ memset(dqs_results_array, 0, sizeof(dqs_results_array)); ++ ++ /* 2.10.5.8.3 (1) */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0030 | (lane << 8)); ++ dword |= (0x1 << 8); /* BlockRxDqsLock = 1 */ ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0030 | (lane << 8), dword); ++ ++ /* 2.10.5.8.3 (3) */ ++ rx_en_offset = (initial_phy_phase_delay[lane] + 0x10) % 0x40; ++ ++ /* 2.10.5.8.3 (4) */ ++ for (current_phy_phase_delay[lane] = rx_en_offset; current_phy_phase_delay[lane] < 0x3ff; current_phy_phase_delay[lane] += ren_step) { ++ /* 2.10.5.8.3 (4 A) */ ++ write_dqs_receiver_enable_control_registers(current_phy_phase_delay, dev, dct, dimm, index_reg); ++ ++ /* Calculate and program MaxRdLatency */ ++ Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, dct); ++ ++ /* 2.10.5.8.3 (4 B) */ ++ dqs_results_array[current_phy_phase_delay[lane]] = TrainDQSRdWrPos_D_Fam15(pMCTstat, pDCTstat, dct, Receiver, Receiver + 2, lane, lane + 1); ++ } ++ ++#ifdef PRINT_PASS_FAIL_BITMAPS ++ uint16_t iter; ++ for (iter = 0; iter < 0x3ff; iter++) { ++ if (dqs_results_array[iter]) ++ printk(BIOS_DEBUG, "+"); ++ else ++ printk(BIOS_DEBUG, "."); ++ } ++ printk(BIOS_DEBUG, "\n"); ++#endif ++ ++ /* 2.10.5.8.3 (5) */ ++ prev = 0; ++ for (current_phy_phase_delay[lane] = rx_en_offset; current_phy_phase_delay[lane] < 0x3ff; current_phy_phase_delay[lane] += ren_step) { ++ if ((dqs_results_array[current_phy_phase_delay[lane]] == 0) && (prev == 1)) { ++ /* Restore last known good delay */ ++ current_phy_phase_delay[lane] -= ren_step; ++ ++ /* 2.10.5.8.3 (5 A B) */ ++ current_phy_phase_delay[lane] -= 0x10; ++ ++ /* Update hardware registers with final values */ ++ write_dqs_receiver_enable_control_registers(current_phy_phase_delay, dev, dct, dimm, index_reg); ++ break; ++ } ++ prev = dqs_results_array[current_phy_phase_delay[lane]]; ++ } ++ ++ /* 2.10.5.8.3 (6) */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0030 | (lane << 8)); ++ dword &= ~(0x1 << 8); /* BlockRxDqsLock = 0 */ ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0030 | (lane << 8), dword); ++ } ++ ++#if DQS_TRAIN_DEBUG > 0 ++ printk(BIOS_DEBUG, "TrainDQSReceiverEnCyc_D_Fam15 DQS receiver enable timing: "); ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ printk(BIOS_DEBUG, " %03x", current_phy_phase_delay[lane]); ++ } ++ printk(BIOS_DEBUG, "\n"); ++#endif ++ } ++ } ++ ++ pDCTstat->TrainErrors |= Errors; ++ pDCTstat->ErrStatus |= Errors; ++ ++#if DQS_TRAIN_DEBUG > 0 ++ { ++ u8 val; ++ u8 i; ++ u8 ChannelDTD, ReceiverDTD, Dir; ++ u8 *p; ++ ++ for (Dir = 0; Dir < 2; Dir++) { ++ if (Dir == 1) { ++ printk(BIOS_DEBUG, "TrainDQSRdWrPos: CH_D_DIR_B_DQS WR:\n"); ++ } else { ++ printk(BIOS_DEBUG, "TrainDQSRdWrPos: CH_D_DIR_B_DQS RD:\n"); ++ } ++ for (ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) { ++ printk(BIOS_DEBUG, "Channel: %02x\n", ChannelDTD); ++ for (ReceiverDTD = 0; ReceiverDTD < MAX_CS_SUPPORTED; ReceiverDTD += 2) { ++ printk(BIOS_DEBUG, "\t\tReceiver: %02x:", ReceiverDTD); ++ p = pDCTstat->CH_D_DIR_B_DQS[ChannelDTD][ReceiverDTD >> 1][Dir]; ++ for (i=0;i<8; i++) { ++ val = p[i]; ++ printk(BIOS_DEBUG, " %02x", val); ++ } ++ printk(BIOS_DEBUG, "\n"); ++ } ++ } ++ } ++ ++ } ++#endif ++ if (_DisableDramECC) { ++ mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC); ++ } ++ if (!_Wrap32Dis) { ++ addr = HWCR; ++ _RDMSR(addr, &lo, &hi); ++ lo &= ~(1<<17); /* restore HWCR.wrap32dis */ ++ _WRMSR(addr, lo, hi); ++ } ++ if (!_SSE2){ ++ cr4 = read_cr4(); ++ cr4 &= ~(1<<9); /* restore cr4.OSFXSR */ ++ write_cr4(cr4); ++ } ++ ++ printk(BIOS_DEBUG, "TrainDQSReceiverEnCyc: Status %x\n", pDCTstat->Status); ++ printk(BIOS_DEBUG, "TrainDQSReceiverEnCyc: TrainErrors %x\n", pDCTstat->TrainErrors); ++ printk(BIOS_DEBUG, "TrainDQSReceiverEnCyc: ErrStatus %x\n", pDCTstat->ErrStatus); ++ printk(BIOS_DEBUG, "TrainDQSReceiverEnCyc: ErrCode %x\n", pDCTstat->ErrCode); ++ printk(BIOS_DEBUG, "TrainDQSReceiverEnCyc: Done\n\n"); ++} ++ + static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u32 *buffer) + { +@@ -869,18 +1785,17 @@ static u8 ChipSelPresent_D(struct MCTStatStruc *pMCTstat, + u32 val; + u32 reg; + u32 dev = pDCTstat->dev_dct; +- u32 reg_off; ++ uint8_t dct = 0; + u8 ret = 0; + +- if (!pDCTstat->GangedMode) { +- reg_off = 0x100 * Channel; +- } else { +- reg_off = 0; +- } ++ if (!pDCTstat->GangedMode) ++ dct = Channel; ++ else ++ dct = 0; + + if (ChipSel < MAX_CS_SUPPORTED){ +- reg = 0x40 + (ChipSel << 2) + reg_off; +- val = Get_NB32(dev, reg); ++ reg = 0x40 + (ChipSel << 2); ++ val = Get_NB32_DCT(dev, dct, reg); + if (val & ( 1 << 0)) + ret = 1; + } +@@ -1085,12 +2000,12 @@ u32 SetUpperFSbase(u32 addr_hi) + return addr_hi << 8; + } + +-void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index) ++void ResetDCTWrPtr_D(u32 dev, uint8_t dct, u32 index_reg, u32 index) + { + u32 val; + +- val = Get_NB32_index_wait(dev, index_reg, index); +- Set_NB32_index_wait(dev, index_reg, index, val); ++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, index, val); + } + + void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, +@@ -1103,9 +2018,13 @@ void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + pDCTstat = pDCTstatA + Node; + if (pDCTstat->DCTSysLimit) { +- TrainDQSRdWrPos_D(pMCTstat, pDCTstat); +- for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) { +- SetEccDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel); ++ if (is_fam15h()) { ++ TrainDQSReceiverEnCyc_D_Fam15(pMCTstat, pDCTstat); ++ } else { ++ TrainDQSRdWrPos_D_Fam10(pMCTstat, pDCTstat); ++ for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) { ++ SetEccDQSRdWrPos_D_Fam10(pMCTstat, pDCTstat, ChipSel); ++ } + } + } + } +@@ -1126,19 +2045,18 @@ u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat, + + dev = pDCTstat->dev_dct; + reg = 0x90; +- val = Get_NB32(dev, reg); ++ val = Get_NB32_DCT(dev, 0, reg); + if (val & (1<<DimmEcEn)) { + _DisableDramECC |= 0x01; + val &= ~(1<<DimmEcEn); +- Set_NB32(dev, reg, val); ++ Set_NB32_DCT(dev, 0, reg, val); + } + if (!pDCTstat->GangedMode) { +- reg = 0x190; +- val = Get_NB32(dev, reg); ++ val = Get_NB32_DCT(dev, 1, reg); + if (val & (1<<DimmEcEn)) { + _DisableDramECC |= 0x02; + val &= ~(1<<DimmEcEn); +- Set_NB32(dev, reg, val); ++ Set_NB32_DCT(dev, 1, reg, val); + } + } + return _DisableDramECC; +@@ -1157,15 +2075,14 @@ void mct_EnableDimmEccEn_D(struct MCTStatStruc *pMCTstat, + + if ((_DisableDramECC & 0x01) == 0x01) { + reg = 0x90; +- val = Get_NB32(dev, reg); ++ val = Get_NB32_DCT(dev, 0, reg); + val |= (1<<DimmEcEn); +- Set_NB32(dev, reg, val); ++ Set_NB32_DCT(dev, 0, reg, val); + } + if ((_DisableDramECC & 0x02) == 0x02) { +- reg = 0x190; +- val = Get_NB32(dev, reg); ++ val = Get_NB32_DCT(dev, 1, reg); + val |= (1<<DimmEcEn); +- Set_NB32(dev, reg, val); ++ Set_NB32_DCT(dev, 1, reg, val); + } + } + +@@ -1177,7 +2094,7 @@ static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat, + { + u8 ByteLane; + u32 val; +- u32 index_reg = 0x98 + 0x100 * pDCTstat->Channel; ++ u32 index_reg = 0x98; + u8 shift; + u32 dqs_delay = (u32)pDCTstat->DQSDelay; + u32 dev = pDCTstat->dev_dct; +@@ -1205,7 +2122,7 @@ static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat, + + index += (ChipSel>>1) << 8; + +- val = Get_NB32_index_wait(dev, index_reg, index); ++ val = Get_NB32_index_wait_DCT(dev, pDCTstat->Channel, index_reg, index); + if (ByteLane < 8) { + if (pDCTstat->Direction == DQS_WRITEDIR) { + dqs_delay += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][ChipSel>>1][ByteLane]; +@@ -1215,7 +2132,7 @@ static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat, + } + val &= ~(0x7f << shift); + val |= (dqs_delay << shift); +- Set_NB32_index_wait(dev, index_reg, index, val); ++ Set_NB32_index_wait_DCT(dev, pDCTstat->Channel, index_reg, index, val); + } + } + +@@ -1241,7 +2158,7 @@ u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat, + u8 Channel, u8 receiver, u8 *valid) + { + u32 val; +- u32 reg_off = 0; ++ uint8_t dct = 0; + u32 reg; + u32 dword; + u32 dev = pDCTstat->dev_dct; +@@ -1250,12 +2167,12 @@ u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat, + + + if (!pDCTstat->GangedMode) { +- reg_off = 0x100 * Channel; ++ dct = Channel; + } + + /* get the local base addr of the chipselect */ +- reg = 0x40 + (receiver << 2) + reg_off; +- val = Get_NB32(dev, reg); ++ reg = 0x40 + (receiver << 2); ++ val = Get_NB32_DCT(dev, dct, reg); + + val &= ~0xe007c01f; + +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctecc_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctecc_d.c +index 0c52791..11f1b2c 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctecc_d.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctecc_d.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -91,19 +92,21 @@ u8 ECCInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA) + + /* Construct these booleans, based on setup options, for easy handling + later in this procedure */ +- OB_NBECC = mctGet_NVbits(NV_NBECC); /* MCA ECC (MCE) enable bit */ ++ OB_NBECC = mctGet_NVbits(NV_NBECC); /* MCA ECC (MCE) enable bit */ + +- OB_ECCRedir = mctGet_NVbits(NV_ECCRedir); /* ECC Redirection */ ++ OB_ECCRedir = mctGet_NVbits(NV_ECCRedir); /* ECC Redirection */ + +- OB_ChipKill = mctGet_NVbits(NV_ChipKill); /* ECC Chip-kill mode */ ++ OB_ChipKill = mctGet_NVbits(NV_ChipKill); /* ECC Chip-kill mode */ ++ OF_ScrubCTL = 0; /* Scrub CTL for Dcache, L2, and dram */ + +- OF_ScrubCTL = 0; /* Scrub CTL for Dcache, L2, and dram */ +- nvbits = mctGet_NVbits(NV_DCBKScrub); +- /* mct_AdjustScrub_D(pDCTstatA, &nvbits); */ /* Need not adjust */ +- OF_ScrubCTL |= (u32) nvbits << 16; ++ if (!is_fam15h()) { ++ nvbits = mctGet_NVbits(NV_DCBKScrub); ++ /* mct_AdjustScrub_D(pDCTstatA, &nvbits); */ /* Need not adjust */ ++ OF_ScrubCTL |= (u32) nvbits << 16; + +- nvbits = mctGet_NVbits(NV_L2BKScrub); +- OF_ScrubCTL |= (u32) nvbits << 8; ++ nvbits = mctGet_NVbits(NV_L2BKScrub); ++ OF_ScrubCTL |= (u32) nvbits << 8; ++ } + + nvbits = mctGet_NVbits(NV_DramBKScrub); + OF_ScrubCTL |= nvbits; +@@ -131,7 +134,7 @@ u8 ECCInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA) + pDCTstat->ErrStatus |= (1 << SB_DramECCDis); + } + AllECC = 0; +- LDramECC =0; ++ LDramECC = 0; + } + } else { + AllECC = 0; +@@ -140,7 +143,7 @@ u8 ECCInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA) + if (OB_NBECC) { + mct_EnableDatIntlv_D(pMCTstat, pDCTstat); + dev = pDCTstat->dev_nbmisc; +- reg =0x44; /* MCA NB Configuration */ ++ reg = 0x44; /* MCA NB Configuration */ + val = Get_NB32(dev, reg); + val |= 1 << 22; /* EccEn */ + Set_NB32(dev, reg, val); +@@ -177,6 +180,10 @@ u8 ECCInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA) + /*WE/RE is checked because memory config may have been */ + if((val & 3)==3) { /* Node has dram populated */ + if (isDramECCEn_D(pDCTstat)) { /* if ECC is enabled on this dram */ ++ if (is_fam15h()) { ++ /* Erratum 505 */ ++ fam15h_switch_dct(pDCTstat->dev_map, 0); ++ } + dev = pDCTstat->dev_nbmisc; + val = curBase << 8; + if(OB_ECCRedir) { +@@ -187,16 +194,18 @@ u8 ECCInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA) + Set_NB32(dev, 0x60, val); /* Dram Scrub Addr High */ + Set_NB32(dev, 0x58, OF_ScrubCTL); /*Scrub Control */ + +- /* Divisor should not be set deeper than +- * divide by 16 when Dcache scrubber or +- * L2 scrubber is enabled. +- */ +- if ((OF_ScrubCTL & (0x1F << 16)) || (OF_ScrubCTL & (0x1F << 8))) { +- val = Get_NB32(dev, 0x84); +- if ((val & 0xE0000000) > 0x80000000) { /* Get F3x84h[31:29]ClkDivisor for C1 */ +- val &= 0x1FFFFFFF; /* If ClkDivisor is deeper than divide-by-16 */ +- val |= 0x80000000; /* set it to divide-by-16 */ +- Set_NB32(dev, 0x84, val); ++ if (!is_fam15h()) { ++ /* Divisor should not be set deeper than ++ * divide by 16 when Dcache scrubber or ++ * L2 scrubber is enabled. ++ */ ++ if ((OF_ScrubCTL & (0x1F << 16)) || (OF_ScrubCTL & (0x1F << 8))) { ++ val = Get_NB32(dev, 0x84); ++ if ((val & 0xE0000000) > 0x80000000) { /* Get F3x84h[31:29]ClkDivisor for C1 */ ++ val &= 0x1FFFFFFF; /* If ClkDivisor is deeper than divide-by-16 */ ++ val |= 0x80000000; /* set it to divide-by-16 */ ++ Set_NB32(dev, 0x84, val); ++ } + } + } + } /* this node has ECC enabled dram */ +@@ -267,8 +276,8 @@ static u8 isDramECCEn_D(struct DCTStatStruc *pDCTstat) + } + for(i=0; i<ch_end; i++) { + if(pDCTstat->DIMMValidDCT[i] > 0){ +- reg = 0x90 + i * 0x100; /* Dram Config Low */ +- val = Get_NB32(dev, reg); ++ reg = 0x90; /* Dram Config Low */ ++ val = Get_NB32_DCT(dev, i, reg); + if(val & (1<<DimmEcEn)) { + /* set local flag 'dram ecc capable' */ + isDimmECCEn = 1; +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcthdi.c b/src/northbridge/amd/amdmct/mct_ddr3/mcthdi.c +index 0112732..a6b9dcb 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mcthdi.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mcthdi.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -25,8 +26,8 @@ void mct_DramInit_Hw_D(struct MCTStatStruc *pMCTstat, + u32 dev = pDCTstat->dev_dct; + + /*flag for selecting HW/SW DRAM Init HW DRAM Init */ +- reg = 0x90 + 0x100 * dct; /*DRAM Configuration Low */ +- val = Get_NB32(dev, reg); ++ reg = 0x90; /*DRAM Configuration Low */ ++ val = Get_NB32_DCT(dev, dct, reg); + val |= (1<<InitDram); +- Set_NB32(dev, reg, val); ++ Set_NB32_DCT(dev, dct, reg, val); + } +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c +index 60bc01d..5e81808 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c +@@ -18,10 +18,12 @@ + * Foundation, Inc. + */ + +-static void SetTargetFreq(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat); +-static void AgesaHwWlPhase1(sMCTStruct *pMCTData, +- sDCTStruct *pDCTData, u8 dimm, u8 pass); ++static void AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass); ++static void AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass); ++static void AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass); + static void EnableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); + static void DisableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); + static void PrepareC_MCT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); +@@ -56,7 +58,7 @@ static void SetEccWrDQS_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pD + Addl_Index = 0x32; + Addl_Index += DimmNum * 3; + +- val = Get_NB32_index_wait(pDCTstat->dev_dct, Channel * 0x100 + 0x98, Addl_Index); ++ val = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, Channel, 0x98, Addl_Index); + if (OddByte) + val >>= 16; + /* Save WrDqs to stack for later usage */ +@@ -74,13 +76,13 @@ static void EnableAutoRefresh_D(struct MCTStatStruc *pMCTstat, struct DCTStatStr + { + u32 val; + +- val = Get_NB32(pDCTstat->dev_dct, 0x8C); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x8C); + val &= ~(1 << DisAutoRefresh); +- Set_NB32(pDCTstat->dev_dct, 0x8C, val); ++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x8C, val); + +- val = Get_NB32(pDCTstat->dev_dct, 0x8C + 0x100); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x8C); + val &= ~(1 << DisAutoRefresh); +- Set_NB32(pDCTstat->dev_dct, 0x8C + 0x100, val); ++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x8C, val); + } + + static void DisableAutoRefresh_D(struct MCTStatStruc *pMCTstat, +@@ -88,13 +90,13 @@ static void DisableAutoRefresh_D(struct MCTStatStruc *pMCTstat, + { + u32 val; + +- val = Get_NB32(pDCTstat->dev_dct, 0x8C); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x8C); + val |= 1 << DisAutoRefresh; +- Set_NB32(pDCTstat->dev_dct, 0x8C, val); ++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x8C, val); + +- val = Get_NB32(pDCTstat->dev_dct, 0x8C + 0x100); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x8C); + val |= 1 << DisAutoRefresh; +- Set_NB32(pDCTstat->dev_dct, 0x8C + 0x100, val); ++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x8C, val); + } + + +@@ -118,8 +120,11 @@ static void PhyWLPass1(struct MCTStatStruc *pMCTstat, + DIMMValid = pDCTstat->DIMMValid; + PrepareC_DCT(pMCTstat, pDCTstat, dct); + for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm ++) { +- if (DIMMValid & (1 << (dimm << 1))) +- AgesaHwWlPhase1(pDCTstat->C_MCTPtr, DCTPtr, dimm, FirstPass); ++ if (DIMMValid & (1 << (dimm << 1))) { ++ AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, FirstPass); ++ AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, FirstPass); ++ AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, FirstPass); ++ } + } + } + } +@@ -146,27 +151,40 @@ static void PhyWLPass2(struct MCTStatStruc *pMCTstat, + pDCTstat->Speed = pDCTstat->DIMMAutoSpeed = pDCTstat->TargetFreq; + pDCTstat->CASL = pDCTstat->DIMMCASL = pDCTstat->TargetCASL; + SPD2ndTiming(pMCTstat, pDCTstat, dct); +- ProgDramMRSReg_D(pMCTstat, pDCTstat, dct); +- PlatformSpec_D(pMCTstat, pDCTstat, dct); +- fenceDynTraining_D(pMCTstat, pDCTstat, dct); ++ if (!is_fam15h()) { ++ ProgDramMRSReg_D(pMCTstat, pDCTstat, dct); ++ PlatformSpec_D(pMCTstat, pDCTstat, dct); ++ fenceDynTraining_D(pMCTstat, pDCTstat, dct); ++ } + Restore_OnDimmMirror(pMCTstat, pDCTstat); + StartupDCT_D(pMCTstat, pDCTstat, dct); + Clear_OnDimmMirror(pMCTstat, pDCTstat); + SetDllSpeedUp_D(pMCTstat, pDCTstat, dct); + DisableAutoRefresh_D(pMCTstat, pDCTstat); + for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm ++) { +- if (DIMMValid & (1 << (dimm << 1))) +- AgesaHwWlPhase1(pDCTstat->C_MCTPtr, pDCTstat->C_DCTPtr[dct], dimm, SecondPass); ++ if (DIMMValid & (1 << (dimm << 1))) { ++ AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, SecondPass); ++ AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, SecondPass); ++ AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, SecondPass); ++ } + } + } + } + ++static uint16_t fam15h_next_highest_memclk_freq(uint16_t memclk_freq) ++{ ++ uint16_t fam15h_next_highest_freq_tab[] = {0, 0, 0, 0, 0x6, 0, 0xa, 0, 0, 0, 0xe, 0, 0, 0, 0x12, 0, 0, 0, 0x16, 0, 0, 0, 0x16}; ++ return fam15h_next_highest_freq_tab[memclk_freq]; ++} ++ + /* Write Levelization Training + * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.1 + */ + static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstat) ++ struct DCTStatStruc *pDCTstat, uint8_t Pass) + { ++ uint16_t final_target_freq; ++ + pDCTstat->C_MCTPtr = &(pDCTstat->s_C_MCTPtr); + pDCTstat->C_DCTPtr[0] = &(pDCTstat->s_C_DCTPtr[0]); + pDCTstat->C_DCTPtr[1] = &(pDCTstat->s_C_DCTPtr[1]); +@@ -182,16 +200,39 @@ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat, + pDCTstat->DIMMValidDCT[1] = pDCTstat->DIMMValidDCT[0]; + } + +- PhyWLPass1(pMCTstat, pDCTstat, 0); +- PhyWLPass1(pMCTstat, pDCTstat, 1); ++ if (Pass == FirstPass) { ++ PhyWLPass1(pMCTstat, pDCTstat, 0); ++ PhyWLPass1(pMCTstat, pDCTstat, 1); ++ } ++ ++ if (Pass == SecondPass) { ++ if (pDCTstat->TargetFreq > mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) { ++ /* 8.Prepare the memory subsystem for the target MEMCLK frequency. ++ * NOTE: BIOS must program both DCTs to the same frequency. ++ * NOTE: Fam15h steps the frequency, Fam10h slams the frequency. ++ */ ++ final_target_freq = pDCTstat->TargetFreq; ++ ++ while (pDCTstat->Speed != final_target_freq) { ++ if (is_fam15h()) ++ pDCTstat->TargetFreq = fam15h_next_highest_memclk_freq(pDCTstat->Speed); ++ else ++ pDCTstat->TargetFreq = final_target_freq; ++ SetTargetFreq(pMCTstat, pDCTstat); ++ PhyWLPass2(pMCTstat, pDCTstat, 0); ++ PhyWLPass2(pMCTstat, pDCTstat, 1); ++ } ++ ++ pDCTstat->TargetFreq = final_target_freq; + +- if (pDCTstat->TargetFreq > 4) { +- /* 8.Prepare the memory subsystem for the target MEMCLK frequency. +- * Note: BIOS must program both DCTs to the same frequency. +- */ +- SetTargetFreq(pMCTstat, pDCTstat); +- PhyWLPass2(pMCTstat, pDCTstat, 0); +- PhyWLPass2(pMCTstat, pDCTstat, 1); ++ uint8_t dct; ++ for (dct = 0; dct < 2; dct++) { ++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; ++ memcpy(pDCTData->WLGrossDelayFinalPass, pDCTData->WLGrossDelayPrevPass, sizeof(pDCTData->WLGrossDelayPrevPass)); ++ memcpy(pDCTData->WLFineDelayFinalPass, pDCTData->WLFineDelayPrevPass, sizeof(pDCTData->WLFineDelayPrevPass)); ++ pDCTData->WLCriticalGrossDelayFinalPass = pDCTData->WLCriticalGrossDelayPrevPass; ++ } ++ } + } + + SetEccWrDQS_D(pMCTstat, pDCTstat); +@@ -200,7 +241,7 @@ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat, + } + + void mct_WriteLevelization_HW(struct MCTStatStruc *pMCTstat, +- struct DCTStatStruc *pDCTstatA) ++ struct DCTStatStruc *pDCTstatA, uint8_t Pass) + { + u8 Node; + +@@ -211,7 +252,7 @@ void mct_WriteLevelization_HW(struct MCTStatStruc *pMCTstat, + if (pDCTstat->NodePresent) { + mctSMBhub_Init(Node); + Clear_OnDimmMirror(pMCTstat, pDCTstat); +- WriteLevelization_HW(pMCTstat, pDCTstat); ++ WriteLevelization_HW(pMCTstat, pDCTstat, Pass); + Restore_OnDimmMirror(pMCTstat, pDCTstat); + } + } +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c +index cda9c6b..5ef4a2c 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c +@@ -34,7 +34,7 @@ u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2) + + if (pDCTstat->LogicalCPUID & AMD_DR_Cx) + misc2 |= 1 << OdtSwizzle; +- val = Get_NB32(pDCTstat->dev_dct, dct * 0x100 + 0x78); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x78); + + val &= 7; + val = ((~val) & 0xff) + 1; +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c b/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c +index bd8b7fb..5ea7fa6 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctrci.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -23,7 +24,6 @@ static u32 mct_ControlRC(struct MCTStatStruc *pMCTstat, + u8 Dimms, DimmNum, MaxDimm, Speed; + u32 val; + u32 dct = 0; +- u32 reg_off = 0; + + DimmNum = (MrsChipSel >> 20) & 0xFE; + +@@ -41,7 +41,6 @@ static u32 mct_ControlRC(struct MCTStatStruc *pMCTstat, + dct = 1; + DimmNum ++; + } +- reg_off = 0x100 * dct; + Dimms = pDCTstat->MAdimms[dct]; + + val = 0; +@@ -95,21 +94,21 @@ static u32 mct_ControlRC(struct MCTStatStruc *pMCTstat, + static void mct_SendCtrlWrd(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u32 val) + { +- u32 reg_off = 0; ++ uint8_t dct = 0; + u32 dev = pDCTstat->dev_dct; + + if (pDCTstat->CSPresent_DCT[0] > 0) { +- reg_off = 0; ++ dct = 0; + } else if (pDCTstat->CSPresent_DCT[1] > 0 ){ +- reg_off = 0x100; ++ dct = 1; + } + +- val |= Get_NB32(dev, reg_off + 0x7C) & ~0xFFFFFF; ++ val |= Get_NB32_DCT(dev, dct, 0x7C) & ~0xFFFFFF; + val |= 1 << SendControlWord; +- Set_NB32(dev, reg_off + 0x7C, val); ++ Set_NB32_DCT(dev, dct, 0x7C, val); + + do { +- val = Get_NB32(dev, reg_off + 0x7C); ++ val = Get_NB32_DCT(dev, dct, 0x7C); + } while (val & (1 << SendControlWord)); + } + +@@ -119,7 +118,6 @@ void mct_DramControlReg_Init_D(struct MCTStatStruc *pMCTstat, + u8 MrsChipSel; + u32 dev = pDCTstat->dev_dct; + u32 val, cw; +- u32 reg_off = 0x100 * dct; + + mct_Wait(1600); + +@@ -127,7 +125,7 @@ void mct_DramControlReg_Init_D(struct MCTStatStruc *pMCTstat, + + for (MrsChipSel = 0; MrsChipSel < 8; MrsChipSel ++, MrsChipSel ++) { + if (pDCTstat->CSPresent & (1 << MrsChipSel)) { +- val = Get_NB32(dev, reg_off + 0xA8); ++ val = Get_NB32_DCT(dev, dct, 0xa8); + val &= ~(0xF << 8); + + switch (MrsChipSel) { +@@ -144,7 +142,7 @@ void mct_DramControlReg_Init_D(struct MCTStatStruc *pMCTstat, + case 7: + val |= (3 << 6) << 8; + } +- Set_NB32(dev, reg_off + 0xA8 , val); ++ Set_NB32_DCT(dev, dct, 0xa8, val); + + for (cw=0; cw <=15; cw ++) { + mct_Wait(1600); +@@ -171,10 +169,10 @@ void FreqChgCtrlWrd(struct MCTStatStruc *pMCTstat, + for (MrsChipSel=0; MrsChipSel < 8; MrsChipSel++, MrsChipSel++) { + if (pDCTstat->CSPresent & (1 << MrsChipSel)) { + /* 2. Program F2x[1, 0]A8[CtrlWordCS]=bit mask for target chip selects. */ +- val = Get_NB32(dev, 0xA8); /* TODO: dct * 0x100 + 0xA8 */ ++ val = Get_NB32_DCT(dev, 0, 0xA8); /* TODO: dct 0 / 1 select */ + val &= ~(0xFF << 8); + val |= (0x3 << (MrsChipSel & 0xFE)) << 8; +- Set_NB32(dev, 0xA8, val); /* TODO: dct * 0x100 + 0xA8 */ ++ Set_NB32_DCT(dev, 0, 0xA8, val); /* TODO: dct 0 / 1 select */ + + /* Resend control word 10 */ + mct_Wait(1600); +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c +index b21b96a..51cbf16 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c +@@ -18,17 +18,182 @@ + * Foundation, Inc. + */ + ++static uint8_t fam15_dimm_dic(struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t dimm, uint8_t rank, uint8_t package_type) ++{ ++ uint8_t dic; ++ ++ /* Calculate DIC based on recommendations in MR1_dct[1:0] */ ++ if (pDCTstat->Status & (1 << SB_LoadReduced)) { ++ /* TODO ++ * LRDIMM unimplemented ++ */ ++ dic = 0x0; ++ } else { ++ dic = 0x1; ++ } ++ ++ return dic; ++} ++ ++static uint8_t fam15_rttwr(struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t dimm, uint8_t rank, uint8_t package_type) ++{ ++ uint8_t term = 0; ++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; ++ uint8_t number_of_dimms = pDCTData->MaxDimmsInstalled; ++ uint8_t frequency_index; ++ uint8_t rank_count = pDCTData->DimmRanks[dimm]; ++ ++ if (is_fam15h()) ++ frequency_index = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f; ++ else ++ frequency_index = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x7; ++ ++ /* FIXME ++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel ++ * For now assume a maximum of 2 DIMMs per channel can be installed ++ */ ++ uint8_t MaxDimmsInstallable = 2; ++ ++ if (is_fam15h()) { ++ if (pDCTstat->Status & (1 << SB_Registered)) { ++ /* TODO ++ * RDIMM unimplemented ++ */ ++ } else { ++ if (package_type == PT_GR) { ++ /* Socket G34: Fam15h BKDG v3.14 Table 56 */ ++ if (MaxDimmsInstallable == 1) { ++ term = 0x0; ++ } else if (MaxDimmsInstallable == 2) { ++ if ((number_of_dimms == 2) && (frequency_index == 0x12)) { ++ term = 0x1; ++ } else if (number_of_dimms == 1) { ++ term = 0x0; ++ } else { ++ term = 0x2; ++ } ++ } else if (MaxDimmsInstallable == 3) { ++ if (number_of_dimms == 1) { ++ if (frequency_index <= 0xa) { ++ term = 0x2; ++ } else { ++ if (rank_count < 3) { ++ term = 0x1; ++ } else { ++ term = 0x2; ++ } ++ } ++ } else if (number_of_dimms == 2) { ++ term = 0x2; ++ } ++ } ++ } else { ++ /* TODO ++ * Other sockets unimplemented ++ */ ++ } ++ } ++ } ++ ++ return term; ++} ++ ++static uint8_t fam15_rttnom(struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t dimm, uint8_t rank, uint8_t package_type) ++{ ++ uint8_t term = 0; ++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; ++ uint8_t number_of_dimms = pDCTData->MaxDimmsInstalled; ++ uint8_t frequency_index; ++ ++ if (is_fam15h()) ++ frequency_index = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x1f; ++ else ++ frequency_index = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x94) & 0x7; ++ ++ /* FIXME ++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel ++ * For now assume a maximum of 2 DIMMs per channel can be installed ++ */ ++ uint8_t MaxDimmsInstallable = 2; ++ ++ if (is_fam15h()) { ++ if (pDCTstat->Status & (1 << SB_LoadReduced)) { ++ /* TODO ++ * LRDIMM unimplemented ++ */ ++ } else if (pDCTstat->Status & (1 << SB_Registered)) { ++ /* TODO ++ * RDIMM unimplemented ++ */ ++ } else { ++ if (package_type == PT_GR) { ++ /* Socket G34: Fam15h BKDG v3.14 Table 56 */ ++ if (MaxDimmsInstallable == 1) { ++ if ((frequency_index == 0x4) || (frequency_index == 0x6)) ++ term = 0x2; ++ else if ((frequency_index == 0xa) || (frequency_index == 0xe)) ++ term = 0x1; ++ else ++ term = 0x3; ++ } ++ if (MaxDimmsInstallable == 2) { ++ if (number_of_dimms == 1) { ++ if (frequency_index <= 0x6) { ++ term = 0x2; ++ } else if (frequency_index <= 0xe) { ++ term = 0x1; ++ } else { ++ term = 0x3; ++ } ++ } else { ++ if (frequency_index <= 0xa) { ++ term = 0x3; ++ } else if (frequency_index <= 0xe) { ++ term = 0x5; ++ } else { ++ term = 0x4; ++ } ++ } ++ } else if (MaxDimmsInstallable == 3) { ++ if (number_of_dimms == 1) { ++ term = 0x0; ++ } else if (number_of_dimms == 2) { ++ if (frequency_index <= 0xa) { ++ if (rank == 1) { ++ term = 0x0; ++ } else { ++ term = 0x3; ++ } ++ } else if (frequency_index <= 0xe) { ++ if (rank == 1) { ++ term = 0x0; ++ } else { ++ term = 0x5; ++ } ++ } ++ } ++ } ++ } else { ++ /* TODO ++ * Other sockets unimplemented ++ */ ++ } ++ } ++ } ++ ++ return term; ++} ++ + static void mct_DramControlReg_Init_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); + + static void mct_DCTAccessDone(struct DCTStatStruc *pDCTstat, u8 dct) + { +- u32 reg_off = 0x100 * dct; + u32 dev = pDCTstat->dev_dct; + u32 val; + + do { +- val = Get_NB32(dev, reg_off + 0x98); ++ val = Get_NB32_DCT(dev, dct, 0x98); + } while (!(val & (1 << DctAccessDone))); + } + +@@ -54,9 +219,15 @@ static u32 swapAddrBits(struct DCTStatStruc *pDCTstat, u32 MR_register_setting, + if (MR_register_setting & (1 << 6)) ret |= 1 << 5; + if (MR_register_setting & (1 << 7)) ret |= 1 << 8; + if (MR_register_setting & (1 << 8)) ret |= 1 << 7; +- if (MR_register_setting & (1 << 16)) ret |= 1 << 17; +- if (MR_register_setting & (1 << 17)) ret |= 1 << 16; +- MR_register_setting &= ~0x301f8; ++ if (is_fam15h()) { ++ if (MR_register_setting & (1 << 18)) ret |= 1 << 19; ++ if (MR_register_setting & (1 << 19)) ret |= 1 << 18; ++ MR_register_setting &= ~0x000c01f8; ++ } else { ++ if (MR_register_setting & (1 << 16)) ret |= 1 << 17; ++ if (MR_register_setting & (1 << 17)) ret |= 1 << 16; ++ MR_register_setting &= ~0x000301f8; ++ } + MR_register_setting |= ret; + } + } +@@ -65,47 +236,76 @@ static u32 swapAddrBits(struct DCTStatStruc *pDCTstat, u32 MR_register_setting, + + static void mct_SendMrsCmd(struct DCTStatStruc *pDCTstat, u8 dct, u32 EMRS) + { +- u32 reg_off = 0x100 * dct; + u32 dev = pDCTstat->dev_dct; + u32 val; + +- val = Get_NB32(dev, reg_off + 0x7C); +- val &= ~0xFFFFFF; ++ val = Get_NB32_DCT(dev, dct, 0x7c); ++ val &= ~0x00ffffff; + val |= EMRS; + val |= 1 << SendMrsCmd; +- Set_NB32(dev, reg_off + 0x7C, val); ++ Set_NB32_DCT(dev, dct, 0x7c, val); + + do { +- val = Get_NB32(dev, reg_off + 0x7C); ++ val = Get_NB32_DCT(dev, dct, 0x7c); + } while (val & (1 << SendMrsCmd)); + } + + static u32 mct_MR2(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct, u32 MrsChipSel) + { +- u32 reg_off = 0x100 * dct; + u32 dev = pDCTstat->dev_dct; + u32 dword, ret; + +- ret = 0x20000; +- ret |= MrsChipSel; ++ if (is_fam15h()) { ++ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE); ++ ++ /* The formula for chip select number is: CS = dimm*2+rank */ ++ uint8_t dimm = MrsChipSel / 2; ++ uint8_t rank = MrsChipSel % 2; + +- /* program MrsAddress[5:3]=CAS write latency (CWL): +- * based on F2x[1,0]84[Tcwl] */ +- dword = Get_NB32(dev, reg_off + 0x84); +- dword = mct_AdjustSPDTimings(pMCTstat, pDCTstat, dword); ++ /* FIXME: These parameters should be configurable ++ * For now, err on the side of caution and enable automatic 2x refresh ++ * when the DDR temperature rises above the internal limits ++ */ ++ uint8_t force_2x_self_refresh = 0; /* ASR */ ++ uint8_t auto_2x_self_refresh = 1; /* SRT */ + +- ret |= ((dword >> 20) & 7) << 3; ++ ret = 0x80000; ++ ret |= (MrsChipSel << 21); + +- /* program MrsAddress[6]=auto self refresh method (ASR): +- based on F2x[1,0]84[ASR] +- program MrsAddress[7]=self refresh temperature range (SRT): +- based on F2x[1,0]84[ASR and SRT] */ +- ret |= ((dword >> 18) & 3) << 6; ++ /* Set self refresh parameters */ ++ ret |= (force_2x_self_refresh << 6); ++ ret |= (auto_2x_self_refresh << 7); + +- /* program MrsAddress[10:9]=dynamic termination during writes (RTT_WR) +- based on F2x[1,0]84[DramTermDyn] */ +- ret |= ((dword >> 10) & 3) << 9; ++ /* Obtain Tcwl, adjust, and set CWL with the adjusted value */ ++ dword = Get_NB32_DCT(dev, dct, 0x20c) & 0x1f; ++ ret |= ((dword - 5) << 3); ++ ++ /* Obtain and set RttWr */ ++ ret |= (fam15_rttwr(pDCTstat, dct, dimm, rank, package_type) << 9); ++ } else { ++ ret = 0x20000; ++ ret |= (MrsChipSel << 20); ++ ++ /* program MrsAddress[5:3]=CAS write latency (CWL): ++ * based on F2x[1,0]84[Tcwl] */ ++ dword = Get_NB32_DCT(dev, dct, 0x84); ++ dword = mct_AdjustSPDTimings(pMCTstat, pDCTstat, dword); ++ ++ ret |= ((dword >> 20) & 7) << 3; ++ ++ /* program MrsAddress[6]=auto self refresh method (ASR): ++ * based on F2x[1,0]84[ASR] ++ * program MrsAddress[7]=self refresh temperature range (SRT): ++ * based on F2x[1,0]84[ASR and SRT] ++ */ ++ ret |= ((dword >> 18) & 3) << 6; ++ ++ /* program MrsAddress[10:9]=dynamic termination during writes (RTT_WR) ++ * based on F2x[1,0]84[DramTermDyn] ++ */ ++ ret |= ((dword >> 10) & 3) << 9; ++ } + + return ret; + } +@@ -113,20 +313,28 @@ static u32 mct_MR2(struct MCTStatStruc *pMCTstat, + static u32 mct_MR3(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct, u32 MrsChipSel) + { +- u32 reg_off = 0x100 * dct; + u32 dev = pDCTstat->dev_dct; + u32 dword, ret; + +- ret = 0x30000; +- ret |= MrsChipSel; ++ if (is_fam15h()) { ++ ret = 0xc0000; ++ ret |= (MrsChipSel << 21); + +- /* program MrsAddress[1:0]=multi purpose register address location +- (MPR Location):based on F2x[1,0]84[MprLoc] +- program MrsAddress[2]=multi purpose register +- (MPR):based on F2x[1,0]84[MprEn] +- */ +- dword = Get_NB32(dev, reg_off + 0x84); +- ret |= (dword >> 24) & 7; ++ /* Program MPR and MPRLoc to 0 */ ++ // ret |= 0x0; /* MPR */ ++ // ret |= (0x0 << 2); /* MPRLoc */ ++ } else { ++ ret = 0x30000; ++ ret |= (MrsChipSel << 20); ++ ++ /* program MrsAddress[1:0]=multi purpose register address location ++ * (MPR Location):based on F2x[1,0]84[MprLoc] ++ * program MrsAddress[2]=multi purpose register ++ * (MPR):based on F2x[1,0]84[MprEn] ++ */ ++ dword = Get_NB32_DCT(dev, dct, 0x84); ++ ret |= (dword >> 24) & 7; ++ } + + return ret; + } +@@ -134,48 +342,93 @@ static u32 mct_MR3(struct MCTStatStruc *pMCTstat, + static u32 mct_MR1(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct, u32 MrsChipSel) + { +- u32 reg_off = 0x100 * dct; + u32 dev = pDCTstat->dev_dct; + u32 dword, ret; + +- ret = 0x10000; +- ret |= MrsChipSel; +- +- /* program MrsAddress[5,1]=output driver impedance control (DIC): +- * based on F2x[1,0]84[DrvImpCtrl] */ +- dword = Get_NB32(dev, reg_off + 0x84); +- if (dword & (1 << 3)) +- ret |= 1 << 5; +- if (dword & (1 << 2)) +- ret |= 1 << 1; +- +- /* program MrsAddress[9,6,2]=nominal termination resistance of ODT (RTT): +- based on F2x[1,0]84[DramTerm] */ +- if (!(pDCTstat->Status & (1 << SB_Registered))) { +- if (dword & (1 << 9)) +- ret |= 1 << 9; +- if (dword & (1 << 8)) +- ret |= 1 << 6; +- if (dword & (1 << 7)) +- ret |= 1 << 2; ++ if (is_fam15h()) { ++ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE); ++ ++ /* Set defaults */ ++ uint8_t qoff = 0; /* Enable output buffers */ ++ uint8_t wrlvl = 0; /* Disable write levelling */ ++ uint8_t tqds = 0; ++ uint8_t rttnom = 0; ++ uint8_t dic = 0; ++ uint8_t additive_latency = 0; ++ uint8_t dll_enable = 0; ++ ++ ret = 0x40000; ++ ret |= (MrsChipSel << 21); ++ ++ /* The formula for chip select number is: CS = dimm*2+rank */ ++ uint8_t dimm = MrsChipSel / 2; ++ uint8_t rank = MrsChipSel % 2; ++ ++ /* Determine if TQDS should be set */ ++ if ((pDCTstat->Dimmx8Present & (1 << dimm)) ++ && (((dimm & 0x1)?(pDCTstat->Dimmx4Present&0x55):(pDCTstat->Dimmx4Present&0xaa)) != 0x0) ++ && (pDCTstat->Status & (1 << SB_LoadReduced))) ++ tqds = 1; ++ ++ /* Obtain RttNom */ ++ rttnom = fam15_rttnom(pDCTstat, dct, dimm, rank, package_type); ++ ++ /* Obtain DIC */ ++ dic = fam15_dimm_dic(pDCTstat, dct, dimm, rank, package_type); ++ ++ /* Load data into MRS word */ ++ ret |= (qoff & 0x1) << 12; ++ ret |= (tqds & 0x1) << 11; ++ ret |= ((rttnom & 0x4) >> 2) << 9; ++ ret |= ((rttnom & 0x2) >> 1) << 6; ++ ret |= ((rttnom & 0x1) >> 0) << 2; ++ ret |= (wrlvl & 0x1) << 7; ++ ret |= ((dic & 0x2) >> 1) << 5; ++ ret |= ((dic & 0x1) >> 0) << 1; ++ ret |= (additive_latency & 0x3) << 3; ++ ret |= (dll_enable & 0x1); + } else { +- ret |= mct_MR1Odt_RDimm(pMCTstat, pDCTstat, dct, MrsChipSel); +- } ++ ret = 0x10000; ++ ret |= (MrsChipSel << 20); ++ ++ /* program MrsAddress[5,1]=output driver impedance control (DIC): ++ * based on F2x[1,0]84[DrvImpCtrl] ++ */ ++ dword = Get_NB32_DCT(dev, dct, 0x84); ++ if (dword & (1 << 3)) ++ ret |= 1 << 5; ++ if (dword & (1 << 2)) ++ ret |= 1 << 1; ++ ++ /* program MrsAddress[9,6,2]=nominal termination resistance of ODT (RTT): ++ * based on F2x[1,0]84[DramTerm] ++ */ ++ if (!(pDCTstat->Status & (1 << SB_Registered))) { ++ if (dword & (1 << 9)) ++ ret |= 1 << 9; ++ if (dword & (1 << 8)) ++ ret |= 1 << 6; ++ if (dword & (1 << 7)) ++ ret |= 1 << 2; ++ } else { ++ ret |= mct_MR1Odt_RDimm(pMCTstat, pDCTstat, dct, MrsChipSel); ++ } + +- /* program MrsAddress[11]=TDQS: based on F2x[1,0]94[RDqsEn] */ +- if (Get_NB32(dev, reg_off + 0x94) & (1 << RDqsEn)) { +- u8 bit; +- /* Set TDQS=1b for x8 DIMM, TDQS=0b for x4 DIMM, when mixed x8 & x4 */ +- bit = (ret >> 21) << 1; +- if ((dct & 1) != 0) +- bit ++; +- if (pDCTstat->Dimmx8Present & (1 << bit)) +- ret |= 1 << 11; +- } ++ /* program MrsAddress[11]=TDQS: based on F2x[1,0]94[RDqsEn] */ ++ if (Get_NB32_DCT(dev, dct, 0x94) & (1 << RDqsEn)) { ++ u8 bit; ++ /* Set TDQS=1b for x8 DIMM, TDQS=0b for x4 DIMM, when mixed x8 & x4 */ ++ bit = (ret >> 21) << 1; ++ if ((dct & 1) != 0) ++ bit ++; ++ if (pDCTstat->Dimmx8Present & (1 << bit)) ++ ret |= 1 << 11; ++ } + +- /* program MrsAddress[12]=QOFF: based on F2x[1,0]84[Qoff] */ +- if (dword & (1 << 13)) +- ret |= 1 << 12; ++ /* program MrsAddress[12]=QOFF: based on F2x[1,0]84[Qoff] */ ++ if (dword & (1 << 13)) ++ ret |= 1 << 12; ++ } + + return ret; + } +@@ -183,60 +436,139 @@ static u32 mct_MR1(struct MCTStatStruc *pMCTstat, + static u32 mct_MR0(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct, u32 MrsChipSel) + { +- u32 reg_off = 0x100 * dct; + u32 dev = pDCTstat->dev_dct; + u32 dword, ret, dword2; + +- ret = 0x00000; +- ret |= MrsChipSel; +- +- /* program MrsAddress[1:0]=burst length and control method +- (BL):based on F2x[1,0]84[BurstCtrl] */ +- dword = Get_NB32(dev, reg_off + 0x84); +- ret |= dword & 3; +- +- /* program MrsAddress[3]=1 (BT):interleaved */ +- ret |= 1 << 3; +- +- /* program MrsAddress[6:4,2]=read CAS latency +- (CL):based on F2x[1,0]88[Tcl] */ +- dword2 = Get_NB32(dev, reg_off + 0x88); +- ret |= (dword2 & 0x7) << 4; /* F2x88[2:0] to MrsAddress[6:4] */ +- ret |= ((dword2 & 0x8) >> 3) << 2; /* F2x88[3] to MrsAddress[2] */ +- +- /* program MrsAddress[12]=0 (PPD):slow exit */ +- if (dword & (1 << 23)) +- ret |= 1 << 12; +- +- /* program MrsAddress[11:9]=write recovery for auto-precharge +- (WR):based on F2x[1,0]84[Twr] */ +- ret |= ((dword >> 4) & 7) << 9; +- +- /* program MrsAddress[8]=1 (DLL):DLL reset +- just issue DLL reset at first time */ +- ret |= 1 << 8; ++ if (is_fam15h()) { ++ ret = 0x00000; ++ ret |= (MrsChipSel << 21); ++ ++ /* Set defaults */ ++ uint8_t ppd = 0; ++ uint8_t wr_ap = 0; ++ uint8_t dll_reset = 1; ++ uint8_t test_mode = 0; ++ uint8_t cas_latency = 0; ++ uint8_t read_burst_type = 1; ++ uint8_t burst_length = 0; ++ ++ /* Obtain PchgPDModeSel */ ++ dword = Get_NB32_DCT(dev, dct, 0x84); ++ ppd = (dword >> 23) & 0x1; ++ ++ /* Obtain Twr */ ++ dword = Get_NB32_DCT(dev, dct, 0x22c) & 0x1f; ++ ++ /* Calculate wr_ap (Fam15h BKDG v3.14 Table 82) */ ++ if (dword == 0x10) ++ wr_ap = 0x0; ++ else if (dword == 0x5) ++ wr_ap = 0x1; ++ else if (dword == 0x6) ++ wr_ap = 0x2; ++ else if (dword == 0x7) ++ wr_ap = 0x3; ++ else if (dword == 0x8) ++ wr_ap = 0x4; ++ else if (dword == 0xa) ++ wr_ap = 0x5; ++ else if (dword == 0xc) ++ wr_ap = 0x6; ++ else if (dword == 0xe) ++ wr_ap = 0x7; ++ ++ /* Obtain Tcl */ ++ dword = Get_NB32_DCT(dev, dct, 0x200) & 0x1f; ++ ++ /* Calculate cas_latency (Fam15h BKDG v3.14 Table 83) */ ++ if (dword == 0x5) ++ cas_latency = 0x2; ++ else if (dword == 0x6) ++ cas_latency = 0x4; ++ else if (dword == 0x7) ++ cas_latency = 0x6; ++ else if (dword == 0x8) ++ cas_latency = 0x8; ++ else if (dword == 0x9) ++ cas_latency = 0xa; ++ else if (dword == 0xa) ++ cas_latency = 0xc; ++ else if (dword == 0xb) ++ cas_latency = 0xe; ++ else if (dword == 0xc) ++ cas_latency = 0x1; ++ else if (dword == 0xd) ++ cas_latency = 0x3; ++ else if (dword == 0xe) ++ cas_latency = 0x5; ++ else if (dword == 0xf) ++ cas_latency = 0x7; ++ else if (dword == 0x10) ++ cas_latency = 0x9; ++ ++ /* Obtain BurstCtrl */ ++ burst_length = Get_NB32_DCT(dev, dct, 0x84) & 0x3; ++ ++ /* Load data into MRS word */ ++ ret |= (ppd & 0x1) << 12; ++ ret |= (wr_ap & 0x3) << 9; ++ ret |= (dll_reset & 0x1) << 8; ++ ret |= (test_mode & 0x1) << 7; ++ ret |= ((cas_latency & 0xe) >> 1) << 4; ++ ret |= ((cas_latency & 0x1) >> 0) << 2; ++ ret |= (read_burst_type & 0x1) << 3; ++ ret |= (burst_length & 0x3); ++ } else { ++ ret = 0x00000; ++ ret |= (MrsChipSel << 20); ++ ++ /* program MrsAddress[1:0]=burst length and control method ++ (BL):based on F2x[1,0]84[BurstCtrl] */ ++ dword = Get_NB32_DCT(dev, dct, 0x84); ++ ret |= dword & 3; ++ ++ /* program MrsAddress[3]=1 (BT):interleaved */ ++ ret |= 1 << 3; ++ ++ /* program MrsAddress[6:4,2]=read CAS latency ++ (CL):based on F2x[1,0]88[Tcl] */ ++ dword2 = Get_NB32_DCT(dev, dct, 0x88); ++ ret |= (dword2 & 0x7) << 4; /* F2x88[2:0] to MrsAddress[6:4] */ ++ ret |= ((dword2 & 0x8) >> 3) << 2; /* F2x88[3] to MrsAddress[2] */ ++ ++ /* program MrsAddress[12]=0 (PPD):slow exit */ ++ if (dword & (1 << 23)) ++ ret |= 1 << 12; ++ ++ /* program MrsAddress[11:9]=write recovery for auto-precharge ++ (WR):based on F2x[1,0]84[Twr] */ ++ ret |= ((dword >> 4) & 7) << 9; ++ ++ /* program MrsAddress[8]=1 (DLL):DLL reset ++ just issue DLL reset at first time */ ++ ret |= 1 << 8; ++ } + + return ret; + } + + static void mct_SendZQCmd(struct DCTStatStruc *pDCTstat, u8 dct) + { +- u32 reg_off = 0x100 * dct; + u32 dev = pDCTstat->dev_dct; + u32 dword; + + /*1.Program MrsAddress[10]=1 + 2.Set SendZQCmd=1 + */ +- dword = Get_NB32(dev, reg_off + 0x7C); ++ dword = Get_NB32_DCT(dev, dct, 0x7C); + dword &= ~0xFFFFFF; + dword |= 1 << 10; + dword |= 1 << SendZQCmd; +- Set_NB32(dev, reg_off + 0x7C, dword); ++ Set_NB32_DCT(dev, dct, 0x7C, dword); + + /* Wait for SendZQCmd=0 */ + do { +- dword = Get_NB32(dev, reg_off + 0x7C); ++ dword = Get_NB32_DCT(dev, dct, 0x7C); + } while (dword & (1 << SendZQCmd)); + + /* 4.Wait 512 MEMCLKs */ +@@ -248,31 +580,30 @@ void mct_DramInit_Sw_D(struct MCTStatStruc *pMCTstat, + { + u8 MrsChipSel; + u32 dword; +- u32 reg_off = 0x100 * dct; + u32 dev = pDCTstat->dev_dct; + +- if (pDCTstat->DIMMAutoSpeed == 4) { ++ if (pDCTstat->DIMMAutoSpeed == mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) { + /* 3.Program F2x[1,0]7C[EnDramInit]=1 */ +- dword = Get_NB32(dev, reg_off + 0x7C); ++ dword = Get_NB32_DCT(dev, dct, 0x7c); + dword |= 1 << EnDramInit; +- Set_NB32(dev, reg_off + 0x7C, dword); ++ Set_NB32_DCT(dev, dct, 0x7c, dword); + mct_DCTAccessDone(pDCTstat, dct); + + /* 4.wait 200us */ + mct_Wait(40000); + +- /* 5.On revision C processors, program F2x[1, 0]7C[DeassertMemRstX] = 1. */ +- dword = Get_NB32(dev, reg_off + 0x7C); ++ /* 5.Program F2x[1, 0]7C[DeassertMemRstX] = 1. */ ++ dword = Get_NB32_DCT(dev, dct, 0x7c); + dword |= 1 << DeassertMemRstX; +- Set_NB32(dev, reg_off + 0x7C, dword); ++ Set_NB32_DCT(dev, dct, 0x7c, dword); + + /* 6.wait 500us */ + mct_Wait(200000); + + /* 7.Program F2x[1,0]7C[AssertCke]=1 */ +- dword = Get_NB32(dev, reg_off + 0x7C); ++ dword = Get_NB32_DCT(dev, dct, 0x7c); + dword |= 1 << AssertCke; +- Set_NB32(dev, reg_off + 0x7C, dword); ++ Set_NB32_DCT(dev, dct, 0x7c, dword); + + /* 8.wait 360ns */ + mct_Wait(80); +@@ -281,6 +612,13 @@ void mct_DramInit_Sw_D(struct MCTStatStruc *pMCTstat, + * must be done for each chip select pair */ + if (pDCTstat->Status & (1 << SB_Registered)) + mct_DramControlReg_Init_D(pMCTstat, pDCTstat, dct); ++ ++ /* The following steps are performed with load reduced DIMMs only and ++ * must be done for each DIMM */ ++ // if (pDCTstat->Status & (1 << SB_LoadReduced)) ++ /* TODO ++ * Implement LRDIMM configuration ++ */ + } + + /* The following steps are performed once for unbuffered DIMMs and once for each +@@ -289,23 +627,23 @@ void mct_DramInit_Sw_D(struct MCTStatStruc *pMCTstat, + if (pDCTstat->CSPresent & (1 << MrsChipSel)) { + u32 EMRS; + /* 13.Send EMRS(2) */ +- EMRS = mct_MR2(pMCTstat, pDCTstat, dct, MrsChipSel << 20); ++ EMRS = mct_MR2(pMCTstat, pDCTstat, dct, MrsChipSel); + EMRS = swapAddrBits(pDCTstat, EMRS, MrsChipSel, dct); + mct_SendMrsCmd(pDCTstat, dct, EMRS); + /* 14.Send EMRS(3). Ordinarily at this time, MrsAddress[2:0]=000b */ +- EMRS= mct_MR3(pMCTstat, pDCTstat, dct, MrsChipSel << 20); ++ EMRS= mct_MR3(pMCTstat, pDCTstat, dct, MrsChipSel); + EMRS = swapAddrBits(pDCTstat, EMRS, MrsChipSel, dct); + mct_SendMrsCmd(pDCTstat, dct, EMRS); + /* 15.Send EMRS(1) */ +- EMRS= mct_MR1(pMCTstat, pDCTstat, dct, MrsChipSel << 20); ++ EMRS= mct_MR1(pMCTstat, pDCTstat, dct, MrsChipSel); + EMRS = swapAddrBits(pDCTstat, EMRS, MrsChipSel, dct); + mct_SendMrsCmd(pDCTstat, dct, EMRS); + /* 16.Send MRS with MrsAddress[8]=1(reset the DLL) */ +- EMRS= mct_MR0(pMCTstat, pDCTstat, dct, MrsChipSel << 20); ++ EMRS= mct_MR0(pMCTstat, pDCTstat, dct, MrsChipSel); + EMRS = swapAddrBits(pDCTstat, EMRS, MrsChipSel, dct); + mct_SendMrsCmd(pDCTstat, dct, EMRS); + +- if (pDCTstat->DIMMAutoSpeed == 4) ++ if (pDCTstat->DIMMAutoSpeed == mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) + if (!(pDCTstat->Status & (1 << SB_Registered))) + break; /* For UDIMM, only send MR commands once per channel */ + } +@@ -314,16 +652,15 @@ void mct_DramInit_Sw_D(struct MCTStatStruc *pMCTstat, + MrsChipSel ++; + } + +- mct_Wait(100000); +- +- if (pDCTstat->DIMMAutoSpeed == 4) { ++ if (pDCTstat->DIMMAutoSpeed == mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) { + /* 17.Send two ZQCL commands */ + mct_SendZQCmd(pDCTstat, dct); + mct_SendZQCmd(pDCTstat, dct); ++ + /* 18.Program F2x[1,0]7C[EnDramInit]=0 */ +- dword = Get_NB32(dev, reg_off + 0x7C); ++ dword = Get_NB32_DCT(dev, dct, 0x7C); + dword &= ~(1 << EnDramInit); +- Set_NB32(dev, reg_off + 0x7C, dword); ++ Set_NB32_DCT(dev, dct, 0x7C, dword); + mct_DCTAccessDone(pDCTstat, dct); + } + } +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c +index 91e8f77..011a94f 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c +@@ -23,7 +23,10 @@ + Description: Receiver En and DQS Timing Training feature for DDR 3 MCT + ******************************************************************************/ + +-static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, ++static int32_t abs(int32_t val); ++static void dqsTrainRcvrEn_SW_Fam10(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, u8 Pass); ++static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Pass); + static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +@@ -32,7 +35,7 @@ static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, + static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Channel); + static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly); +-static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat, ++static uint32_t fenceDynTraining_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); + static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat); + +@@ -89,11 +92,154 @@ static void SetupRcvrPattern(struct MCTStatStruc *pMCTstat, + void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Pass) + { +- if(mct_checkNumberOfDqsRcvEn_1Pass(Pass)) +- dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass); ++ if(mct_checkNumberOfDqsRcvEn_1Pass(Pass)) { ++ if (is_fam15h()) ++ dqsTrainRcvrEn_SW_Fam15(pMCTstat, pDCTstat, Pass); ++ else ++ dqsTrainRcvrEn_SW_Fam10(pMCTstat, pDCTstat, Pass); ++ } + } + +-static void read_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dimm, uint32_t index_reg) ++static uint16_t fam15_receiver_enable_training_seed(struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t dimm, uint8_t rank, uint8_t package_type) ++{ ++ uint32_t dword; ++ uint16_t seed = 0; ++ ++ /* FIXME ++ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel ++ * For now assume a maximum of 2 DIMMs per channel can be installed ++ */ ++ uint8_t MaxDimmsInstallable = 2; ++ ++ uint8_t channel = dct; ++ if (package_type == PT_GR) { ++ /* Get the internal node number */ ++ dword = Get_NB32(pDCTstat->dev_nbmisc, 0xe8); ++ dword = (dword >> 30) & 0x3; ++ if (dword == 1) { ++ channel += 2; ++ } ++ } ++ ++ if (pDCTstat->Status & (1 << SB_Registered)) { ++ if (package_type == PT_GR) { ++ /* Socket G34: Fam15h BKDG v3.14 Table 99 */ ++ if (MaxDimmsInstallable == 1) { ++ if (channel == 0) ++ seed = 0x43; ++ else if (channel == 1) ++ seed = 0x3f; ++ else if (channel == 2) ++ seed = 0x3a; ++ else if (channel == 3) ++ seed = 0x35; ++ } else if (MaxDimmsInstallable == 2) { ++ if (channel == 0) ++ seed = 0x54; ++ else if (channel == 1) ++ seed = 0x4d; ++ else if (channel == 2) ++ seed = 0x45; ++ else if (channel == 3) ++ seed = 0x40; ++ } else if (MaxDimmsInstallable == 3) { ++ if (channel == 0) ++ seed = 0x6b; ++ else if (channel == 1) ++ seed = 0x5e; ++ else if (channel == 2) ++ seed = 0x4b; ++ else if (channel == 3) ++ seed = 0x3d; ++ } ++ } else if (package_type == PT_C3) { ++ /* Socket C32: Fam15h BKDG v3.14 Table 100 */ ++ if ((MaxDimmsInstallable == 1) || (MaxDimmsInstallable == 2)) { ++ if (channel == 0) ++ seed = 0x3f; ++ else if (channel == 1) ++ seed = 0x3e; ++ } else if (MaxDimmsInstallable == 3) { ++ if (channel == 0) ++ seed = 0x47; ++ else if (channel == 1) ++ seed = 0x38; ++ } ++ } ++ } else if (pDCTstat->Status & (1 << SB_LoadReduced)) { ++ if (package_type == PT_GR) { ++ /* Socket G34: Fam15h BKDG v3.14 Table 99 */ ++ if (MaxDimmsInstallable == 1) { ++ if (channel == 0) ++ seed = 0x123; ++ else if (channel == 1) ++ seed = 0x122; ++ else if (channel == 2) ++ seed = 0x112; ++ else if (channel == 3) ++ seed = 0x102; ++ } ++ } else if (package_type == PT_C3) { ++ /* Socket C32: Fam15h BKDG v3.14 Table 100 */ ++ if (channel == 0) ++ seed = 0x132; ++ else if (channel == 1) ++ seed = 0x122; ++ } ++ } else { ++ if (package_type == PT_GR) { ++ /* Socket G34: Fam15h BKDG v3.14 Table 99 */ ++ if (MaxDimmsInstallable == 1) { ++ if (channel == 0) ++ seed = 0x3e; ++ else if (channel == 1) ++ seed = 0x38; ++ else if (channel == 2) ++ seed = 0x37; ++ else if (channel == 3) ++ seed = 0x31; ++ } else if (MaxDimmsInstallable == 2) { ++ if (channel == 0) ++ seed = 0x51; ++ else if (channel == 1) ++ seed = 0x4a; ++ else if (channel == 2) ++ seed = 0x46; ++ else if (channel == 3) ++ seed = 0x3f; ++ } else if (MaxDimmsInstallable == 3) { ++ if (channel == 0) ++ seed = 0x5e; ++ else if (channel == 1) ++ seed = 0x52; ++ else if (channel == 2) ++ seed = 0x48; ++ else if (channel == 3) ++ seed = 0x3c; ++ } ++ } else if (package_type == PT_C3) { ++ /* Socket C32: Fam15h BKDG v3.14 Table 100 */ ++ if ((MaxDimmsInstallable == 1) || (MaxDimmsInstallable == 2)) { ++ if (channel == 0) ++ seed = 0x39; ++ else if (channel == 1) ++ seed = 0x32; ++ } else if (MaxDimmsInstallable == 3) { ++ if (channel == 0) ++ seed = 0x45; ++ else if (channel == 1) ++ seed = 0x37; ++ } ++ } else if (package_type == PT_M2) { ++ /* Socket AM3: Fam15h BKDG v3.14 Table 101 */ ++ seed = 0x3a; ++ } ++ } ++ ++ return seed; ++} ++ ++static void read_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) + { + uint8_t lane; + uint32_t dword; +@@ -111,7 +257,7 @@ static void read_dqs_write_timing_control_registers(uint16_t* current_total_dela + if (lane == 8) + wdt_reg = 0x32; + wdt_reg += dimm * 3; +- dword = Get_NB32_index_wait(dev, index_reg, wdt_reg); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, wdt_reg); + if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) + current_total_delay[lane] = (dword & 0x00ff0000) >> 16; + if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) +@@ -119,12 +265,124 @@ static void read_dqs_write_timing_control_registers(uint16_t* current_total_dela + } + } + +-static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dimm, uint32_t index_reg) ++#ifdef UNUSED_CODE ++static void write_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) ++{ ++ uint8_t lane; ++ uint32_t dword; ++ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ uint32_t ret_reg; ++ if ((lane == 0) || (lane == 1)) ++ ret_reg = 0x30; ++ if ((lane == 2) || (lane == 3)) ++ ret_reg = 0x31; ++ if ((lane == 4) || (lane == 5)) ++ ret_reg = 0x40; ++ if ((lane == 6) || (lane == 7)) ++ ret_reg = 0x41; ++ if (lane == 8) ++ ret_reg = 0x32; ++ ret_reg += dimm * 3; ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg); ++ if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) { ++ dword &= ~(0xff << 16); ++ dword |= (current_total_delay[lane] & 0xff) << 16; ++ } ++ if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) { ++ dword &= ~0xff; ++ dword |= current_total_delay[lane] & 0xff; ++ } ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg, dword); ++ } ++} ++#endif ++ ++static void write_write_data_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) ++{ ++ uint8_t lane; ++ uint32_t dword; ++ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ uint32_t wdt_reg; ++ ++ /* Calculate Write Data Timing register location */ ++ if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3)) ++ wdt_reg = 0x1; ++ if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7)) ++ wdt_reg = 0x2; ++ if (lane == 8) ++ wdt_reg = 0x3; ++ wdt_reg |= (dimm << 8); ++ ++ /* Set Write Data Timing register values */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, wdt_reg); ++ if ((lane == 7) || (lane == 3)) { ++ dword &= ~(0x7f << 24); ++ dword |= (current_total_delay[lane] & 0x7f) << 24; ++ } ++ if ((lane == 6) || (lane == 2)) { ++ dword &= ~(0x7f << 16); ++ dword |= (current_total_delay[lane] & 0x7f) << 16; ++ } ++ if ((lane == 5) || (lane == 1)) { ++ dword &= ~(0x7f << 8); ++ dword |= (current_total_delay[lane] & 0x7f) << 8; ++ } ++ if ((lane == 8) || (lane == 4) || (lane == 0)) { ++ dword &= ~0x7f; ++ dword |= current_total_delay[lane] & 0x7f; ++ } ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, wdt_reg, dword); ++ } ++} ++ ++static void read_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) ++{ ++ uint8_t lane; ++ uint32_t mask; ++ uint32_t dword; ++ ++ if (is_fam15h()) ++ mask = 0x3ff; ++ else ++ mask = 0x1ff; ++ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ uint32_t ret_reg; ++ if ((lane == 0) || (lane == 1)) ++ ret_reg = 0x10; ++ if ((lane == 2) || (lane == 3)) ++ ret_reg = 0x11; ++ if ((lane == 4) || (lane == 5)) ++ ret_reg = 0x20; ++ if ((lane == 6) || (lane == 7)) ++ ret_reg = 0x21; ++ if (lane == 8) ++ ret_reg = 0x12; ++ ret_reg += dimm * 3; ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg); ++ if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) { ++ current_total_delay[lane] = (dword & (mask << 16)) >> 16; ++ } ++ if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) { ++ current_total_delay[lane] = dword & mask; ++ } ++ } ++} ++ ++static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) + { + uint8_t lane; ++ uint32_t mask; + uint32_t dword; + +- for (lane = 0; lane < 8; lane++) { ++ if (is_fam15h()) ++ mask = 0x3ff; ++ else ++ mask = 0x1ff; ++ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { + uint32_t ret_reg; + if ((lane == 0) || (lane == 1)) + ret_reg = 0x10; +@@ -134,17 +392,125 @@ static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_ + ret_reg = 0x20; + if ((lane == 6) || (lane == 7)) + ret_reg = 0x21; ++ if (lane == 8) ++ ret_reg = 0x12; + ret_reg += dimm * 3; +- dword = Get_NB32_index_wait(dev, index_reg, ret_reg); ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg); + if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) { +- dword &= ~(0x1ff << 16); +- dword |= (current_total_delay[lane] & 0x1ff) << 16; ++ dword &= ~(mask << 16); ++ dword |= (current_total_delay[lane] & mask) << 16; + } +- if ((lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) { +- dword &= ~0x1ff; +- dword |= current_total_delay[lane] & 0x1ff; ++ if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) { ++ dword &= ~mask; ++ dword |= current_total_delay[lane] & mask; + } +- Set_NB32_index_wait(dev, index_reg, ret_reg, dword); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg, dword); ++ } ++} ++ ++static void read_dram_phase_recovery_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) ++{ ++ uint8_t lane; ++ uint32_t dword; ++ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ uint32_t prc_reg; ++ ++ /* Calculate DRAM Phase Recovery Control register location */ ++ if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3)) ++ prc_reg = 0x50; ++ if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7)) ++ prc_reg = 0x51; ++ if (lane == 8) ++ prc_reg = 0x52; ++ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, prc_reg); ++ if ((lane == 7) || (lane == 3)) { ++ current_total_delay[lane] = (dword >> 24) & 0x7f; ++ } ++ if ((lane == 6) || (lane == 2)) { ++ current_total_delay[lane] = (dword >> 16) & 0x7f; ++ } ++ if ((lane == 5) || (lane == 1)) { ++ current_total_delay[lane] = (dword >> 8) & 0x7f; ++ } ++ if ((lane == 8) || (lane == 4) || (lane == 0)) { ++ current_total_delay[lane] = dword & 0x7f; ++ } ++ } ++} ++ ++static void write_dram_phase_recovery_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) ++{ ++ uint8_t lane; ++ uint32_t dword; ++ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ uint32_t prc_reg; ++ ++ /* Calculate DRAM Phase Recovery Control register location */ ++ if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3)) ++ prc_reg = 0x50; ++ if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7)) ++ prc_reg = 0x51; ++ if (lane == 8) ++ prc_reg = 0x52; ++ ++ /* Set DRAM Phase Recovery Control register values */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, prc_reg); ++ if ((lane == 7) || (lane == 3)) { ++ dword &= ~(0x7f << 24); ++ dword |= (current_total_delay[lane] & 0x7f) << 24; ++ } ++ if ((lane == 6) || (lane == 2)) { ++ dword &= ~(0x7f << 16); ++ dword |= (current_total_delay[lane] & 0x7f) << 16; ++ } ++ if ((lane == 5) || (lane == 1)) { ++ dword &= ~(0x7f << 8); ++ dword |= (current_total_delay[lane] & 0x7f) << 8; ++ } ++ if ((lane == 8) || (lane == 4) || (lane == 0)) { ++ dword &= ~0x7f; ++ dword |= current_total_delay[lane] & 0x7f; ++ } ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, prc_reg, dword); ++ } ++} ++ ++static void read_read_dqs_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) ++{ ++ uint8_t lane; ++ uint32_t dword; ++ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ uint32_t rdt_reg; ++ ++ /* Calculate DRAM Read DQS Timing register location */ ++ if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3)) ++ rdt_reg = 0x5; ++ if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7)) ++ rdt_reg = 0x6; ++ if (lane == 8) ++ rdt_reg = 0x7; ++ rdt_reg |= (dimm << 8); ++ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, rdt_reg); ++ if ((lane == 7) || (lane == 3)) { ++ current_total_delay[lane] = (dword >> 24) & 0x3f; ++ } ++ if ((lane == 6) || (lane == 2)) { ++ current_total_delay[lane] = (dword >> 16) & 0x3f; ++ } ++ if ((lane == 5) || (lane == 1)) { ++ current_total_delay[lane] = (dword >> 8) & 0x3f; ++ } ++ if ((lane == 8) || (lane == 4) || (lane == 0)) { ++ current_total_delay[lane] = dword & 0x3f; ++ } ++ ++ if (is_fam15h()) ++ current_total_delay[lane] >>= 1; + } + } + +@@ -160,10 +526,11 @@ static uint32_t convert_testaddr_and_channel_to_address(struct DCTStatStruc *pDC + return testaddr; + } + +-/* DQS Receiver Enable Training +- * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.2 ++/* DQS Receiver Enable Training (Family 10h) ++ * Algorithm detailed in: ++ * The Fam10h BKDG Rev. 3.62 section 2.8.9.9.2 + */ +-static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, ++static void dqsTrainRcvrEn_SW_Fam10(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Pass) + { + u8 Channel; +@@ -171,7 +538,6 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + u8 Addl_Index = 0; + u8 Receiver; + u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0; +- u8 Final_Value; + u16 CTLRMaxDelay; + u16 MaxDelay_CH[2]; + u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B; +@@ -188,6 +554,7 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + u32 lo, hi; + + uint32_t dword; ++ uint8_t dimm; + uint8_t rank; + uint8_t lane; + uint16_t current_total_delay[MAX_BYTE_LANES]; +@@ -214,14 +581,13 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + } + + for (ch = ch_start; ch < ch_end; ch++) { +- reg = 0x78 + (0x100 * ch); +- val = Get_NB32(dev, reg); ++ reg = 0x78; ++ val = Get_NB32_DCT(dev, ch, reg); + val &= ~(0x3ff << 22); +- val |= (0x0c8 << 22); /* Max Rd Lat */ +- Set_NB32(dev, reg, val); ++ val |= (0x0c8 << 22); /* MaxRdLatency = 0xc8 */ ++ Set_NB32_DCT(dev, ch, reg, val); + } + +- Final_Value = 1; + if (Pass == FirstPass) { + mct_InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat); + } else { +@@ -260,7 +626,7 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + + CTLRMaxDelay = 0; + MaxDelay_CH[Channel] = 0; +- index_reg = 0x98 + 0x100 * Channel; ++ index_reg = 0x98; + + Receiver = mct_InitReceiver_D(pDCTstat, Channel); + /* There are four receiver pairs, loosely associated with chipselects. +@@ -268,6 +634,7 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + */ + for (; Receiver < 8; Receiver += 2) { + Addl_Index = (Receiver >> 1) * 3 + 0x10; ++ dimm = (Receiver >> 1); + + print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2); + +@@ -284,45 +651,14 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + /* 2.8.9.9.2 (1, 6) + * Retrieve gross and fine timing fields from write DQS registers + */ +- read_dqs_write_timing_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); ++ read_dqs_write_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg); + + /* 2.8.9.9.2 (1) + * Program the Write Data Timing and Write ECC Timing register to + * the values stored in the DQS Write Timing Control register + * for each lane + */ +- for (lane = 0; lane < MAX_BYTE_LANES; lane++) { +- uint32_t wdt_reg; +- +- /* Calculate Write Data Timing register location */ +- if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3)) +- wdt_reg = 0x1; +- if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7)) +- wdt_reg = 0x2; +- if (lane == 8) +- wdt_reg = 0x3; +- wdt_reg |= ((Receiver / 2) << 8); +- +- /* Set Write Data Timing register values */ +- dword = Get_NB32_index_wait(dev, index_reg, wdt_reg); +- if ((lane == 7) || (lane == 3)) { +- dword &= ~(0x7f << 24); +- dword |= (current_total_delay[lane] & 0x7f) << 24; +- } +- if ((lane == 6) || (lane == 2)) { +- dword &= ~(0x7f << 16); +- dword |= (current_total_delay[lane] & 0x7f) << 16; +- } +- if ((lane == 5) || (lane == 1)) { +- dword &= ~(0x7f << 8); +- dword |= (current_total_delay[lane] & 0x7f) << 8; +- } +- if ((lane == 8) || (lane == 4) || (lane == 0)) { +- dword &= ~0x7f; +- dword |= current_total_delay[lane] & 0x7f; +- } +- Set_NB32_index_wait(dev, index_reg, wdt_reg, dword); +- } ++ write_write_data_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg); + + /* 2.8.9.9.2 (2) + * Program the Read DQS Timing Control and the Read DQS ECC Timing Control registers +@@ -336,12 +672,12 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + rdt_reg = 0x6; + if (lane == 8) + rdt_reg = 0x7; +- rdt_reg |= ((Receiver / 2) << 8); ++ rdt_reg |= (dimm << 8); + if (lane == 8) + dword = 0x0000003f; + else + dword = 0x3f3f3f3f; +- Set_NB32_index_wait(dev, index_reg, rdt_reg, dword); ++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, rdt_reg, dword); + } + + /* 2.8.9.9.2 (3) +@@ -371,7 +707,7 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2); + + /* 2.8.9.9.2 (4, 5) +- * Write 1 cache line of the appropriate test pattern to each test addresse ++ * Write 1 cache line of the appropriate test pattern to each test address + */ + mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, 0); /* rank 0 of DIMM, testpattern 0 */ + mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, 1); /* rank 0 of DIMM, testpattern 1 */ +@@ -390,7 +726,7 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + /* 2.8.9.9.2 (6) + * Write gross and fine timing fields to read DQS registers + */ +- write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); ++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); + + /* 2.8.9.9.2 (7) + * Loop over all delay values up to 1 MEMCLK (0x40 delay steps) from the initial delay values +@@ -417,8 +753,8 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + break; + + /* 2.8.9.9.2 (7 A) +- * Loop over all ranks +- */ ++ * Loop over all ranks ++ */ + for (rank = 0; rank < (_2Ranks + 1); rank++) { + /* 2.8.9.9.2 (7 A a-d) + * Read the first test address of the current rank +@@ -434,17 +770,17 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + */ + proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B); + result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel)); +- write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); ++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); + proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1); + result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel)); +- write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); ++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); + } else { + proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1); + result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel)); +- write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); ++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); + proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B); + result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel)); +- write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); ++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); + } + /* 2.8.9.9.2 (7 A e) + * Compare both read patterns and flag passing ranks/lanes +@@ -533,7 +869,7 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + } + + /* Update delays in hardware */ +- write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); ++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); + + /* Save previous results for comparison in the next iteration */ + for (lane = 0; lane < 8; lane++) +@@ -587,7 +923,483 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + mct_SetMaxLatency_D(pDCTstat, Channel, CTLRMaxDelay); /* program Ch A/B MaxAsyncLat to correspond with max delay */ + } + +- ResetDCTWrPtr_D(dev, index_reg, Addl_Index); ++ for (Channel = 0; Channel < 2; Channel++) { ++ ResetDCTWrPtr_D(dev, Channel, index_reg, Addl_Index); ++ } ++ ++ if(_DisableDramECC) { ++ mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC); ++ } ++ ++ if (Pass == FirstPass) { ++ /*Disable DQSRcvrEn training mode */ ++ mct_DisableDQSRcvEn_D(pDCTstat); ++ } ++ ++ if(!_Wrap32Dis) { ++ msr = HWCR; ++ _RDMSR(msr, &lo, &hi); ++ lo &= ~(1<<17); /* restore HWCR.wrap32dis */ ++ _WRMSR(msr, lo, hi); ++ } ++ if(!_SSE2){ ++ cr4 = read_cr4(); ++ cr4 &= ~(1<<9); /* restore cr4.OSFXSR */ ++ write_cr4(cr4); ++ } ++ ++#if DQS_TRAIN_DEBUG > 0 ++ { ++ u8 ChannelDTD; ++ printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n"); ++ for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) { ++ printk(BIOS_DEBUG, "Channel:%x: %x\n", ++ ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]); ++ } ++ } ++#endif ++ ++#if DQS_TRAIN_DEBUG > 0 ++ { ++ u16 valDTD; ++ u8 ChannelDTD, ReceiverDTD; ++ u8 i; ++ u16 *p; ++ ++ printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n"); ++ for(ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) { ++ printk(BIOS_DEBUG, "Channel:%x\n", ChannelDTD); ++ for(ReceiverDTD = 0; ReceiverDTD<8; ReceiverDTD+=2) { ++ printk(BIOS_DEBUG, "\t\tReceiver:%x:", ReceiverDTD); ++ p = pDCTstat->CH_D_B_RCVRDLY[ChannelDTD][ReceiverDTD>>1]; ++ for (i=0;i<8; i++) { ++ valDTD = p[i]; ++ printk(BIOS_DEBUG, " %03x", valDTD); ++ } ++ printk(BIOS_DEBUG, "\n"); ++ } ++ } ++ } ++#endif ++ ++ printk(BIOS_DEBUG, "TrainRcvrEn: Status %x\n", pDCTstat->Status); ++ printk(BIOS_DEBUG, "TrainRcvrEn: ErrStatus %x\n", pDCTstat->ErrStatus); ++ printk(BIOS_DEBUG, "TrainRcvrEn: ErrCode %x\n", pDCTstat->ErrCode); ++ printk(BIOS_DEBUG, "TrainRcvrEn: Done\n\n"); ++} ++ ++/* DQS Receiver Enable Training Pattern Generation (Family 15h) ++ * Algorithm detailed in: ++ * The Fam15h BKDG Rev. 3.14 section 2.10.5.8.2 (4) ++ */ ++static void generate_dram_receiver_enable_training_pattern_fam15(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t Receiver) ++{ ++ uint32_t dword; ++ uint32_t dev = pDCTstat->dev_dct; ++ ++ /* 2.10.5.7.1.1 ++ * It appears that the DCT only supports 8-beat burst length mode, ++ * so do nothing here... ++ */ ++ ++ /* Wait for CmdSendInProg == 0 */ ++ do { ++ dword = Get_NB32_DCT(dev, dct, 0x250); ++ } while (dword & (0x1 << 12)); ++ ++ /* Set CmdTestEnable = 1 */ ++ dword = Get_NB32_DCT(dev, dct, 0x250); ++ dword |= (0x1 << 2); ++ Set_NB32_DCT(dev, dct, 0x250, dword); ++ ++ /* 2.10.5.8.6.1.1 Send Activate Command */ ++ dword = Get_NB32_DCT(dev, dct, 0x28c); ++ dword &= ~(0xff << 22); /* CmdChipSelect = Receiver */ ++ dword |= ((0x1 << Receiver) << 22); ++ dword &= ~(0x7 << 19); /* CmdBank = 0 */ ++ dword &= ~(0x3ffff); /* CmdAddress = 0 */ ++ dword |= (0x1 << 31); /* SendActCmd = 1 */ ++ Set_NB32_DCT(dev, dct, 0x28c, dword); ++ ++ /* Wait for SendActCmd == 0 */ ++ do { ++ dword = Get_NB32_DCT(dev, dct, 0x28c); ++ } while (dword & (0x1 << 31)); ++ ++ /* Wait 75 MEMCLKs. */ ++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 75); ++ ++ /* 2.10.5.8.6.1.2 */ ++ Set_NB32_DCT(dev, dct, 0x274, 0x0); /* DQMask = 0 */ ++ Set_NB32_DCT(dev, dct, 0x278, 0x0); ++ ++ dword = Get_NB32_DCT(dev, dct, 0x27c); ++ dword &= ~(0xff); /* EccMask = 0 */ ++ if (pDCTstat->DimmECCPresent == 0) ++ dword |= 0xff; /* EccMask = 0xff */ ++ Set_NB32_DCT(dev, dct, 0x27c, dword); ++ ++ /* 2.10.5.8.6.1.2 */ ++ dword = Get_NB32_DCT(dev, dct, 0x270); ++ dword &= ~(0x7ffff); /* DataPrbsSeed = 55555 */ ++// dword |= (0x55555); ++ dword |= (0x44443); /* Use AGESA seed */ ++ Set_NB32_DCT(dev, dct, 0x270, dword); ++ ++ /* 2.10.5.8.2 (4) */ ++ dword = Get_NB32_DCT(dev, dct, 0x260); ++ dword &= ~(0x1fffff); /* CmdCount = 192 */ ++ dword |= 192; ++ Set_NB32_DCT(dev, dct, 0x260, dword); ++ ++#if 0 ++ /* TODO: This applies to Fam15h model 10h and above only */ ++ /* Program Bubble Count and CmdStreamLen */ ++ dword = Get_NB32_DCT(dev, dct, 0x25c); ++ dword &= ~(0x3ff << 12); /* BubbleCnt = 0 */ ++ dword &= ~(0x3ff << 22); /* BubbleCnt2 = 0 */ ++ dword &= ~(0xff); /* CmdStreamLen = 1 */ ++ dword |= 0x1; ++ Set_NB32_DCT(dev, dct, 0x25c, dword); ++#endif ++ ++ /* Configure Target A */ ++ dword = Get_NB32_DCT(dev, dct, 0x254); ++ dword &= ~(0x7 << 24); /* TgtChipSelect = Receiver */ ++ dword |= (Receiver & 0x7) << 24; ++ dword &= ~(0x7 << 21); /* TgtBank = 0 */ ++ dword &= ~(0x3ff); /* TgtAddress = 0 */ ++ Set_NB32_DCT(dev, dct, 0x254, dword); ++ ++ dword = Get_NB32_DCT(dev, dct, 0x250); ++ dword |= (0x1 << 3); /* ResetAllErr = 1 */ ++ dword &= ~(0x1 << 4); /* StopOnErr = 0 */ ++ dword &= ~(0x3 << 8); /* CmdTgt = 0 (Target A) */ ++ dword &= ~(0x7 << 5); /* CmdType = 0 (Read) */ ++ dword |= (0x1 << 11); /* SendCmd = 1 */ ++ Set_NB32_DCT(dev, dct, 0x250, dword); ++ ++ /* 2.10.5.8.6.1.2 Wait for TestStatus == 1 and CmdSendInProg == 0 */ ++ do { ++ dword = Get_NB32_DCT(dev, dct, 0x250); ++ } while ((dword & (0x1 << 12)) || (!(dword & (0x1 << 10)))); ++ ++ dword = Get_NB32_DCT(dev, dct, 0x250); ++ dword &= ~(0x1 << 11); /* SendCmd = 0 */ ++ Set_NB32_DCT(dev, dct, 0x250, dword); ++ ++ /* 2.10.5.8.6.1.1 Send Precharge Command */ ++ /* Wait 25 MEMCLKs. */ ++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 25); ++ ++ dword = Get_NB32_DCT(dev, dct, 0x28c); ++ dword &= ~(0xff << 22); /* CmdChipSelect = Receiver */ ++ dword |= ((0x1 << Receiver) << 22); ++ dword &= ~(0x7 << 19); /* CmdBank = 0 */ ++ dword &= ~(0x3ffff); /* CmdAddress = 0x400 */ ++ dword |= 0x400; ++ dword |= (0x1 << 30); /* SendPchgCmd = 1 */ ++ Set_NB32_DCT(dev, dct, 0x28c, dword); ++ ++ /* Wait for SendPchgCmd == 0 */ ++ do { ++ dword = Get_NB32_DCT(dev, dct, 0x28c); ++ } while (dword & (0x1 << 30)); ++ ++ /* Wait 25 MEMCLKs. */ ++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 25); ++ ++ /* Set CmdTestEnable = 0 */ ++ dword = Get_NB32_DCT(dev, dct, 0x250); ++ dword &= ~(0x1 << 2); ++ Set_NB32_DCT(dev, dct, 0x250, dword); ++} ++ ++/* DQS Receiver Enable Training (Family 15h) ++ * Algorithm detailed in: ++ * The Fam15h BKDG Rev. 3.14 section 2.10.5.8.2 ++ * This algorithm runs once at the lowest supported MEMCLK, ++ * then once again at the highest supported MEMCLK. ++ */ ++static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat, ++ struct DCTStatStruc *pDCTstat, u8 Pass) ++{ ++ u8 Channel; ++ u8 _2Ranks; ++ u8 Addl_Index = 0; ++ u8 Receiver; ++ u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0; ++ u32 Errors; ++ ++ u32 val; ++ u32 dev; ++ u32 index_reg; ++ u32 ch_start, ch_end, ch; ++ u32 msr; ++ u32 cr4; ++ u32 lo, hi; ++ ++ uint32_t dword; ++ uint8_t dimm; ++ uint8_t rank; ++ uint8_t lane; ++ uint8_t mem_clk; ++ uint16_t initial_seed; ++ uint16_t current_total_delay[MAX_BYTE_LANES]; ++ uint16_t dqs_ret_pass1_total_delay[MAX_BYTE_LANES]; ++ uint16_t rank0_current_total_delay[MAX_BYTE_LANES]; ++ uint16_t phase_recovery_delays[MAX_BYTE_LANES]; ++ uint16_t seed[MAX_BYTE_LANES]; ++ uint16_t seed_gross[MAX_BYTE_LANES]; ++ uint16_t seed_fine[MAX_BYTE_LANES]; ++ uint16_t seed_pre_gross[MAX_BYTE_LANES]; ++ ++ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE); ++ uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933}; ++ ++ print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0); ++ print_debug_dqs("TrainRcvEn: Pass", Pass, 0); ++ ++ dev = pDCTstat->dev_dct; ++ index_reg = 0x98; ++ ch_start = 0; ++ ch_end = 2; ++ ++ for (ch = ch_start; ch < ch_end; ch++) { ++ uint8_t max_rd_latency = 0x55; ++ uint8_t p_state; ++ ++ /* 2.10.5.6 */ ++ fam15EnableTrainingMode(pMCTstat, pDCTstat, ch, 1); ++ ++ /* 2.10.5.2 */ ++ for (p_state = 0; p_state < 3; p_state++) { ++ val = Get_NB32_DCT_NBPstate(dev, ch, p_state, 0x210); ++ val &= ~(0x3ff << 22); /* MaxRdLatency = max_rd_latency */ ++ val |= (max_rd_latency & 0x3ff) << 22; ++ Set_NB32_DCT_NBPstate(dev, ch, p_state, 0x210, val); ++ } ++ } ++ ++ if (Pass != FirstPass) { ++ pDCTstat->DimmTrainFail = 0; ++ pDCTstat->CSTrainFail = ~pDCTstat->CSPresent; ++ } ++ ++ cr4 = read_cr4(); ++ if(cr4 & ( 1 << 9)) { /* save the old value */ ++ _SSE2 = 1; ++ } ++ cr4 |= (1 << 9); /* OSFXSR enable SSE2 */ ++ write_cr4(cr4); ++ ++ msr = HWCR; ++ _RDMSR(msr, &lo, &hi); ++ /* FIXME: Why use SSEDIS */ ++ if(lo & (1 << 17)) { /* save the old value */ ++ _Wrap32Dis = 1; ++ } ++ lo |= (1 << 17); /* HWCR.wrap32dis */ ++ lo &= ~(1 << 15); /* SSEDIS */ ++ _WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */ ++ ++ _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat); ++ ++ Errors = 0; ++ dev = pDCTstat->dev_dct; ++ ++ for (Channel = 0; Channel < 2; Channel++) { ++ print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1); ++ print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1); ++ pDCTstat->Channel = Channel; ++ ++ mem_clk = Get_NB32_DCT(dev, Channel, 0x94) & 0x1f; ++ ++ Receiver = mct_InitReceiver_D(pDCTstat, Channel); ++ /* There are four receiver pairs, loosely associated with chipselects. ++ * This is essentially looping over each DIMM. ++ */ ++ for (; Receiver < 8; Receiver += 2) { ++ Addl_Index = (Receiver >> 1) * 3 + 0x10; ++ dimm = (Receiver >> 1); ++ ++ print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2); ++ ++ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) { ++ continue; ++ } ++ ++ /* Retrieve the total delay values from pass 1 of DQS receiver enable training */ ++ if (Pass != FirstPass) { ++ read_dqs_receiver_enable_control_registers(dqs_ret_pass1_total_delay, dev, Channel, dimm, index_reg); ++ } ++ ++ /* 2.10.5.8.2 ++ * Loop over all ranks ++ */ ++ if (mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) ++ _2Ranks = 1; ++ else ++ _2Ranks = 0; ++ for (rank = 0; rank < (_2Ranks + 1); rank++) { ++ /* 2.10.5.8.2 (1) ++ * Specify the target DIMM to be trained ++ * Set TrNibbleSel = 0 ++ * ++ * TODO: Add support for x4 DIMMs ++ */ ++ dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008); ++ dword &= ~(0x3 << 4); /* TrDimmSel */ ++ dword |= ((dimm & 0x3) << 4); ++ dword &= ~(0x1 << 2); /* TrNibbleSel */ ++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword); ++ ++ /* 2.10.5.8.2 (2) ++ * Retrieve gross and fine timing fields from write DQS registers ++ */ ++ read_dqs_write_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg); ++ ++ /* 2.10.5.8.2.1 ++ * Generate the DQS Receiver Enable Training Seed Values ++ */ ++ if (Pass == FirstPass) { ++ initial_seed = fam15_receiver_enable_training_seed(pDCTstat, Channel, dimm, rank, package_type); ++ ++ /* Adjust seed for the minimum platform supported frequency */ ++ initial_seed = (uint16_t) (((((uint64_t) initial_seed) * ++ fam15h_freq_tab[mem_clk] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100))); ++ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ uint16_t wl_pass1_delay; ++ wl_pass1_delay = current_total_delay[lane]; ++ ++ seed[lane] = initial_seed + wl_pass1_delay; ++ } ++ } else { ++ uint8_t addr_prelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */ ++ uint16_t register_delay; ++ int16_t seed_prescaling; ++ ++ memcpy(current_total_delay, dqs_ret_pass1_total_delay, sizeof(current_total_delay)); ++ if ((pDCTstat->Status & (1 << SB_Registered))) { ++ if (addr_prelaunch) ++ register_delay = 0x30; ++ else ++ register_delay = 0x20; ++ } else if ((pDCTstat->Status & (1 << SB_LoadReduced))) { ++ /* TODO ++ * Load reduced DIMM support unimplemented ++ */ ++ register_delay = 0x0; ++ } else { ++ register_delay = 0x0; ++ } ++ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ seed_prescaling = current_total_delay[lane] - register_delay - 0x20; ++ seed[lane] = (uint16_t) (register_delay + ((((uint64_t) seed_prescaling) * fam15h_freq_tab[mem_clk] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100))); ++ } ++ } ++ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ seed_gross[lane] = (seed[lane] >> 5) & 0x1f; ++ seed_fine[lane] = seed[lane] & 0x1f; ++ ++ /*if (seed_gross[lane] == 0) ++ seed_pre_gross[lane] = 0; ++ else */if (seed_gross[lane] & 0x1) ++ seed_pre_gross[lane] = 1; ++ else ++ seed_pre_gross[lane] = 2; ++ ++ /* Calculate phase recovery delays */ ++ phase_recovery_delays[lane] = ((seed_pre_gross[lane] & 0x1f) << 5) | (seed_fine[lane] & 0x1f); ++ ++ /* Set the gross delay. ++ * NOTE: While the BKDG states to only program DqsRcvEnGrossDelay, this appears ++ * to have been a misprint as DqsRcvEnFineDelay should be set to zero as well. ++ */ ++ current_total_delay[lane] = ((seed_gross[lane] & 0x1f) << 5); ++ } ++ ++ /* 2.10.5.8.2 (2) / 2.10.5.8.2.1 (5 6) ++ * Program PhRecFineDly and PhRecGrossDly ++ */ ++ write_dram_phase_recovery_control_registers(phase_recovery_delays, dev, Channel, dimm, index_reg); ++ ++ /* 2.10.5.8.2 (2) / 2.10.5.8.2.1 (7) ++ * Program the DQS Receiver Enable delay values for each lane ++ */ ++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); ++ ++ /* 2.10.5.8.2 (3) ++ * Program DqsRcvTrEn = 1 ++ */ ++ dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008); ++ dword |= (0x1 << 13); ++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword); ++ ++ /* 2.10.5.8.2 (4) ++ * Issue 192 read requests to the target rank ++ */ ++ generate_dram_receiver_enable_training_pattern_fam15(pMCTstat, pDCTstat, Channel, Receiver + (rank & 0x1)); ++ ++ /* 2.10.5.8.2 (5) ++ * Program DqsRcvTrEn = 0 ++ */ ++ dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008); ++ dword &= ~(0x1 << 13); ++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword); ++ ++ /* 2.10.5.8.2 (6) ++ * Read PhRecGrossDly, PhRecFineDly ++ */ ++ read_dram_phase_recovery_control_registers(phase_recovery_delays, dev, Channel, dimm, index_reg); ++ ++ /* 2.10.5.8.2 (7) ++ * Calculate and program the DQS Receiver Enable delay values ++ */ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ current_total_delay[lane] = (phase_recovery_delays[lane] & 0x1f); ++ current_total_delay[lane] |= ((seed_gross[lane] + ((phase_recovery_delays[lane] >> 5) & 0x1f) - seed_pre_gross[lane] + 1) << 5); ++ if (lane == 8) ++ pDCTstat->CH_D_BC_RCVRDLY[Channel][dimm] = current_total_delay[lane]; ++ else ++ pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane] = current_total_delay[lane]; ++ } ++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); ++ ++ if (rank == 0) { ++ /* Back up the Rank 0 delays for later use */ ++ memcpy(rank0_current_total_delay, current_total_delay, sizeof(current_total_delay)); ++ } ++ ++ if (rank == 1) { ++ /* 2.10.5.8.2 (8) ++ * Compute the average delay across both ranks and program the result into ++ * the DQS Receiver Enable delay registers ++ */ ++ for (lane = 0; lane < MAX_BYTE_LANES; lane++) { ++ current_total_delay[lane] = (rank0_current_total_delay[lane] + current_total_delay[lane]) / 2; ++ if (lane == 8) ++ pDCTstat->CH_D_BC_RCVRDLY[Channel][dimm] = current_total_delay[lane]; ++ else ++ pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane] = current_total_delay[lane]; ++ } ++ write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); ++ } ++ } ++ ++#if DQS_TRAIN_DEBUG > 0 ++ for (lane = 0; lane < 8; lane++) ++ print_debug_dqs_pair("\t\tTrainRcvEn55: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2); ++#endif ++ } ++ } ++ ++ /* Calculate and program MaxRdLatency */ ++ Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, Channel); + + if(_DisableDramECC) { + mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC); +@@ -674,10 +1486,10 @@ static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat) + } + + for (ch=0; ch<ch_end; ch++) { +- reg = 0x78 + 0x100 * ch; +- val = Get_NB32(dev, reg); ++ reg = 0x78; ++ val = Get_NB32_DCT(dev, ch, reg); + val &= ~(1 << DqsRcvEnTrain); +- Set_NB32(dev, reg, val); ++ Set_NB32_DCT(dev, ch, reg, val); + } + } + +@@ -718,7 +1530,7 @@ void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly, + /* get the register index from table */ + index = Table_DQSRcvEn_Offset[i >> 1]; + index += Addl_Index; /* DIMMx DqsRcvEn byte0 */ +- val = Get_NB32_index_wait(dev, index_reg, index); ++ val = Get_NB32_index_wait_DCT(dev, Channel, index_reg, index); + if(i & 1) { + /* odd byte lane */ + val &= ~(0x1ff << 16); +@@ -728,7 +1540,7 @@ void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly, + val &= ~0x1ff; + val |= (RcvrEnDly & 0x1ff); + } +- Set_NB32_index_wait(dev, index_reg, index, val); ++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, index, val); + } + + } +@@ -742,7 +1554,6 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 D + u32 reg; + u32 SubTotal; + u32 index_reg; +- u32 reg_off; + u32 val; + + uint8_t cpu_val_n; +@@ -777,17 +1588,16 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 D + Channel = 0; + + dev = pDCTstat->dev_dct; +- reg_off = 0x100 * Channel; +- index_reg = 0x98 + reg_off; ++ index_reg = 0x98; + + /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/ +- val = Get_NB32(dev, 0x88 + reg_off); ++ val = Get_NB32_DCT(dev, Channel, 0x88); + SubTotal = ((val & 0x0f) + 4) << 1; /* SubTotal is 1/2 Memclk unit */ + + /* If registered DIMMs are being used then + * add 1 MEMCLK to the sub-total. + */ +- val = Get_NB32(dev, 0x90 + reg_off); ++ val = Get_NB32_DCT(dev, Channel, 0x90); + if(!(val & (1 << UnBuffDimm))) + SubTotal += 2; + +@@ -795,7 +1605,7 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 D + * add 1, else add 2 to the sub-total. + * if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2; + */ +- val = Get_NB32_index_wait(dev, index_reg, 0x04); ++ val = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x04); + if(!(val & 0x00202020)) + SubTotal += 1; + else +@@ -803,7 +1613,7 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 D + + /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs, + * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */ +- val = Get_NB32(dev, 0x78 + reg_off); ++ val = Get_NB32_DCT(dev, Channel, 0x78); + SubTotal += 8 - (val & 0x0f); + + /* Convert bits 7-5 (also referred to as the coarse delay) of +@@ -824,7 +1634,7 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 D + * clocks (NCLKs) + */ + SubTotal *= 200 * ((Get_NB32(pDCTstat->dev_nbmisc, 0xd4) & 0x1f) + 4); +- SubTotal /= freq_tab[((Get_NB32(pDCTstat->dev_dct, 0x94 + reg_off) & 0x7) - 3)]; ++ SubTotal /= freq_tab[((Get_NB32_DCT(pDCTstat->dev_dct, Channel, 0x94) & 0x7) - 3)]; + SubTotal = (SubTotal + (2 - 1)) / 2; /* Round up */ + + /* Add "N" NCLKs to the sub-total. "N" represents part of the +@@ -841,13 +1651,13 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 D + /* Program the F2x[1, 0]78[MaxRdLatency] register with + * the total delay value (in NCLKs). + */ +- reg = 0x78 + reg_off; +- val = Get_NB32(dev, reg); ++ reg = 0x78; ++ val = Get_NB32_DCT(dev, Channel, reg); + val &= ~(0x3ff << 22); + val |= (SubTotal & 0x3ff) << 22; + + /* program MaxRdLatency to correspond with current delay */ +- Set_NB32(dev, reg, val); ++ Set_NB32_DCT(dev, Channel, reg, val); + } + + static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, +@@ -877,7 +1687,7 @@ static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, + u32 dword; + u8 dn = 4; /* TODO: Rev C could be 4 */ + u32 dev = pDCTstat->dev_dct; +- u32 index_reg = 0x98 + 0x100 * Channel; ++ u32 index_reg = 0x98; + + /* FIXME: add Cx support */ + dword = 0x00000000; +@@ -885,7 +1695,7 @@ static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, + for(j=0; j<dn; j++) + /* DIMM0 Write Data Timing Low */ + /* DIMM0 Write ECC Timing */ +- Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword); ++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, i + 0x100 * j, dword); + } + + /* errata #180 */ +@@ -893,13 +1703,13 @@ static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, + for(i=5; i<=6; i++) { + for(j=0; j<dn; j++) + /* DIMM0 Read DQS Timing Control Low */ +- Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword); ++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, i + 0x100 * j, dword); + } + + dword = 0x0000002f; + for(j=0; j<dn; j++) + /* DIMM0 Read DQS ECC Timing Control */ +- Set_NB32_index_wait(dev, index_reg, 7 + 0x100 * j, dword); ++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, 7 + 0x100 * j, dword); + } + + void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel) +@@ -912,13 +1722,13 @@ void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel) + u32 val; + + dev = pDCTstat->dev_dct; +- index_reg = 0x98 + Channel * 0x100; ++ index_reg = 0x98; + index = 0x12; + p = pDCTstat->CH_D_BC_RCVRDLY[Channel]; + print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel, 2); + for(ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) { + val = p[ChipSel>>1]; +- Set_NB32_index_wait(dev, index_reg, index, val); ++ Set_NB32_index_wait_DCT(dev, Channel, index_reg, index, val); + print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ", + ChipSel, " rcvr_delay ", val, 2); + index += 3; +@@ -1002,95 +1812,305 @@ void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat, + u8 Node = 0; + struct DCTStatStruc *pDCTstat; + ++ printk(BIOS_DEBUG, "%s: Start\n", __func__); ++ + /* FIXME: skip for Ax */ +- while (Node < MAX_NODES_SUPPORTED) { ++ for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + pDCTstat = pDCTstatA + Node; ++ if (!pDCTstat->NodePresent) ++ continue; ++ ++ if (pDCTstat->DCTSysLimit) { ++ if (is_fam15h()) { ++ /* Fam15h BKDG v3.14 section 2.10.5.3.3 ++ * This picks up where InitDDRPhy left off ++ */ ++ uint8_t dct; ++ uint8_t index; ++ uint32_t dword; ++ uint32_t datc_backup; ++ uint32_t training_dword; ++ uint32_t fence2_config_dword; ++ uint32_t fence_tx_pad_config_dword; ++ uint32_t index_reg = 0x98; ++ uint32_t dev = pDCTstat->dev_dct; ++ ++ for (dct = 0; dct < 2; dct++) { ++ if (!pDCTstat->DIMMValidDCT[dct]) ++ continue; ++ ++ /* Back up D18F2x9C_x0000_0004_dct[1:0] */ ++ datc_backup = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000004); ++ ++ /* FenceTrSel = 0x2 */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008); ++ dword &= ~(0x3 << 6); ++ dword |= (0x2 << 6); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008, dword); ++ ++ /* Set phase recovery seed values */ ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000050, 0x13131313); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000051, 0x13131313); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000052, 0x00000013); ++ ++ training_dword = fenceDynTraining_D(pMCTstat, pDCTstat, dct); ++ ++ /* Save calculated fence value to the TX DLL */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c); ++ dword &= ~(0x1f << 26); ++ dword |= ((training_dword & 0x1f) << 26); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c, dword); ++ ++ /* D18F2x9C_x0D0F_0[F,8:0]0F_dct[1:0][AlwaysEnDllClks]=0x1 */ ++ for (index = 0; index < 0x9; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8)); ++ dword &= ~(0x7 << 12); ++ dword |= (0x1 << 12); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8), dword); ++ } ++ ++ /* FenceTrSel = 0x1 */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008); ++ dword &= ~(0x3 << 6); ++ dword |= (0x1 << 6); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008, dword); ++ ++ /* Set phase recovery seed values */ ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000050, 0x13131313); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000051, 0x13131313); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000052, 0x00000013); ++ ++ training_dword = fenceDynTraining_D(pMCTstat, pDCTstat, dct); ++ ++ /* Save calculated fence value to the RX DLL */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c); ++ dword &= ~(0x1f << 21); ++ dword |= ((training_dword & 0x1f) << 21); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c, dword); ++ ++ /* D18F2x9C_x0D0F_0[F,8:0]0F_dct[1:0][AlwaysEnDllClks]=0x0 */ ++ for (index = 0; index < 0x9; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8)); ++ dword &= ~(0x7 << 12); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8), dword); ++ } ++ ++ /* FenceTrSel = 0x3 */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008); ++ dword &= ~(0x3 << 6); ++ dword |= (0x3 << 6); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008, dword); ++ ++ /* Set phase recovery seed values */ ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000050, 0x13131313); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000051, 0x13131313); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000052, 0x00000013); ++ ++ fence_tx_pad_config_dword = fenceDynTraining_D(pMCTstat, pDCTstat, dct); ++ ++ /* Save calculated fence value to the TX Pad */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c); ++ dword &= ~(0x1f << 16); ++ dword |= ((fence_tx_pad_config_dword & 0x1f) << 16); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c, dword); ++ ++ /* Program D18F2x9C_x0D0F_[C,8,2][2:0]31_dct[1:0] */ ++ training_dword = fence_tx_pad_config_dword; ++ if (fence_tx_pad_config_dword < 16) ++ training_dword |= (0x1 << 4); ++ else ++ training_dword = 0; ++ for (index = 0; index < 0x3; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2031 | (index << 8)); ++ dword &= ~(0x1f); ++ dword |= (training_dword & 0x1f); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2031 | (index << 8), dword); ++ } ++ for (index = 0; index < 0x3; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8031 | (index << 8)); ++ dword &= ~(0x1f); ++ dword |= (training_dword & 0x1f); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8031 | (index << 8), dword); ++ } ++ for (index = 0; index < 0x3; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc031 | (index << 8)); ++ dword &= ~(0x1f); ++ dword |= (training_dword & 0x1f); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc031 | (index << 8), dword); ++ } ++ ++ /* Assemble Fence2 configuration word (Fam15h BKDG v3.14 page 331) */ ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c); ++ fence2_config_dword = 0; ++ ++ /* TxPad */ ++ training_dword = (dword >> 16) & 0x1f; ++ if (training_dword < 16) ++ training_dword |= 0x10; ++ else ++ training_dword = 0; ++ fence2_config_dword |= training_dword; ++ ++ /* RxDll */ ++ training_dword = (dword >> 21) & 0x1f; ++ if (training_dword < 16) ++ training_dword |= 0x10; ++ else ++ training_dword = 0; ++ fence2_config_dword |= (training_dword << 10); ++ ++ /* TxDll */ ++ training_dword = (dword >> 26) & 0x1f; ++ if (training_dword < 16) ++ training_dword |= 0x10; ++ else ++ training_dword = 0; ++ fence2_config_dword |= (training_dword << 5); ++ ++ /* Program D18F2x9C_x0D0F_0[F,8:0]31_dct[1:0] */ ++ for (index = 0; index < 0x9; index++) { ++ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0031 | (index << 8)); ++ dword &= ~(0x7fff); ++ dword |= fence2_config_dword; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0031 | (index << 8), dword); ++ } + +- if(pDCTstat->DCTSysLimit) { +- fenceDynTraining_D(pMCTstat, pDCTstat, 0); +- fenceDynTraining_D(pMCTstat, pDCTstat, 1); ++ /* Restore D18F2x9C_x0000_0004_dct[1:0] */ ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000004, datc_backup); ++ } ++ } else { ++ fenceDynTraining_D(pMCTstat, pDCTstat, 0); ++ fenceDynTraining_D(pMCTstat, pDCTstat, 1); ++ } + } +- Node++; + } ++ ++ printk(BIOS_DEBUG, "%s: Done\n", __func__); + } + +-static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat, ++static uint32_t fenceDynTraining_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) + { + u16 avRecValue; + u32 val; + u32 dev; +- u32 index_reg = 0x98 + 0x100 * dct; ++ u32 index_reg = 0x98; + u32 index; + +- /* BIOS first programs a seed value to the phase recovery engine +- * (recommended 19) registers. +- * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and +- * F2x[1,0]9C_x52.) . +- */ + dev = pDCTstat->dev_dct; +- for (index = 0x50; index <= 0x52; index ++) { +- val = (FenceTrnFinDlySeed & 0x1F); +- if (index != 0x52) { +- val |= val << 8 | val << 16 | val << 24; ++ ++ if (is_fam15h()) { ++ /* Set F2x[1,0]9C_x08[PhyFenceTrEn] */ ++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08); ++ val |= 1 << PhyFenceTrEn; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val); ++ ++ /* Wait 2000 MEMCLKs */ ++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 2000); ++ ++ /* Clear F2x[1,0]9C_x08[PhyFenceTrEn] */ ++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08); ++ val &= ~(1 << PhyFenceTrEn); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val); ++ ++ /* BIOS reads the phase recovery engine registers ++ * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. ++ * Average the fine delay components only. ++ */ ++ avRecValue = 0; ++ for (index = 0x50; index <= 0x52; index++) { ++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index); ++ avRecValue += val & 0x1f; ++ if (index != 0x52) { ++ avRecValue += (val >> 8) & 0x1f; ++ avRecValue += (val >> 16) & 0x1f; ++ avRecValue += (val >> 24) & 0x1f; ++ } + } +- Set_NB32_index_wait(dev, index_reg, index, val); +- } + +- /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */ +- val = Get_NB32_index_wait(dev, index_reg, 0x08); +- val |= 1 << PhyFenceTrEn; +- Set_NB32_index_wait(dev, index_reg, 0x08, val); +- +- /* Wait 200 MEMCLKs. */ +- mct_Wait(50000); /* wait 200us */ +- +- /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */ +- val = Get_NB32_index_wait(dev, index_reg, 0x08); +- val &= ~(1 << PhyFenceTrEn); +- Set_NB32_index_wait(dev, index_reg, 0x08, val); +- +- /* BIOS reads the phase recovery engine registers +- * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */ +- avRecValue = 0; +- for (index = 0x50; index <= 0x52; index ++) { +- val = Get_NB32_index_wait(dev, index_reg, index); +- avRecValue += val & 0x7F; +- if (index != 0x52) { +- avRecValue += (val >> 8) & 0x7F; +- avRecValue += (val >> 16) & 0x7F; +- avRecValue += (val >> 24) & 0x7F; ++ val = avRecValue / 9; ++ if (avRecValue % 9) ++ val++; ++ avRecValue = val; ++ ++ if (avRecValue < 6) ++ avRecValue = 0; ++ else ++ avRecValue -= 6; ++ ++ return avRecValue; ++ } else { ++ /* BIOS first programs a seed value to the phase recovery engine ++ * (recommended 19) registers. ++ * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and ++ * F2x[1,0]9C_x52.) . ++ */ ++ for (index = 0x50; index <= 0x52; index ++) { ++ val = (FenceTrnFinDlySeed & 0x1F); ++ if (index != 0x52) { ++ val |= val << 8 | val << 16 | val << 24; ++ } ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, index, val); + } +- } + +- val = avRecValue / 9; +- if (avRecValue % 9) +- val++; +- avRecValue = val; ++ /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */ ++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08); ++ val |= 1 << PhyFenceTrEn; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val); ++ ++ /* Wait 200 MEMCLKs. */ ++ mct_Wait(50000); /* wait 200us */ ++ ++ /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */ ++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08); ++ val &= ~(1 << PhyFenceTrEn); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val); ++ ++ /* BIOS reads the phase recovery engine registers ++ * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */ ++ avRecValue = 0; ++ for (index = 0x50; index <= 0x52; index ++) { ++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index); ++ avRecValue += val & 0x7F; ++ if (index != 0x52) { ++ avRecValue += (val >> 8) & 0x7F; ++ avRecValue += (val >> 16) & 0x7F; ++ avRecValue += (val >> 24) & 0x7F; ++ } ++ } + +- /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */ +- /* inlined mct_AdjustFenceValue() */ +- /* TODO: The RBC0 is not supported. */ +- /* if (pDCTstat->LogicalCPUID & AMD_RB_C0) +- avRecValue -= 3; +- else +- */ +- if (pDCTstat->LogicalCPUID & AMD_DR_Dx) +- avRecValue -= 8; +- else if (pDCTstat->LogicalCPUID & AMD_DR_Cx) +- avRecValue -= 8; +- else if (pDCTstat->LogicalCPUID & AMD_DR_Bx) +- avRecValue -= 8; +- +- val = Get_NB32_index_wait(dev, index_reg, 0x0C); +- val &= ~(0x1F << 16); +- val |= (avRecValue & 0x1F) << 16; +- Set_NB32_index_wait(dev, index_reg, 0x0C, val); +- +- /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register +- * delays (both channels). */ +- val = Get_NB32_index_wait(dev, index_reg, 0x04); +- Set_NB32_index_wait(dev, index_reg, 0x04, val); ++ val = avRecValue / 9; ++ if (avRecValue % 9) ++ val++; ++ avRecValue = val; ++ ++ /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */ ++ /* inlined mct_AdjustFenceValue() */ ++ /* TODO: The RBC0 is not supported. */ ++ /* if (pDCTstat->LogicalCPUID & AMD_RB_C0) ++ avRecValue -= 3; ++ else ++ */ ++ if (pDCTstat->LogicalCPUID & AMD_DR_Dx) ++ avRecValue -= 8; ++ else if (pDCTstat->LogicalCPUID & AMD_DR_Cx) ++ avRecValue -= 8; ++ else if (pDCTstat->LogicalCPUID & AMD_DR_Bx) ++ avRecValue -= 8; ++ ++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0C); ++ val &= ~(0x1F << 16); ++ val |= (avRecValue & 0x1F) << 16; ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0C, val); ++ ++ /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register ++ * delays (both channels). ++ */ ++ val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x04); ++ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x04, val); ++ ++ return avRecValue; ++ } + } + + void mct_Wait(u32 cycles) +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c +index f01e011..55068ce 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c +@@ -21,8 +21,14 @@ + u8 mct_checkNumberOfDqsRcvEn_1Pass(u8 pass) + { + u8 ret = 1; +- if (pass == SecondPass) +- ret = 0; ++ ++ if (is_fam15h()) { ++ /* Fam15h needs two passes */ ++ ret = 1; ++ } else { ++ if (pass == SecondPass) ++ ret = 0; ++ } + + return ret; + } +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c +index 920f514..68acc75 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c +@@ -218,12 +218,12 @@ static void mct_setMaxRdLatTrnVal_D(struct DCTStatStruc *pDCTstat, + } + + dev = pDCTstat->dev_dct; +- reg = 0x78 + Channel * 0x100; +- val = Get_NB32(dev, reg); ++ reg = 0x78; ++ val = Get_NB32_DCT(dev, Channel, reg); + val &= ~(0x3ff<<22); + val |= MaxRdLatVal<<22; + /* program MaxRdLatency to correspond with current delay */ +- Set_NB32(dev, reg, val); ++ Set_NB32_DCT(dev, Channel, reg, val); + } + + static u8 CompareMaxRdLatTestPattern_D(u32 pattern_buf, u32 addr) +@@ -320,30 +320,28 @@ u8 mct_GetStartMaxRdLat_D(struct MCTStatStruc *pMCTstat, + u32 valx; + u32 valxx; + u32 index_reg; +- u32 reg_off; + u32 dev; + + if(pDCTstat->GangedMode) + Channel = 0; + +- index_reg = 0x98 + 0x100 * Channel; ++ index_reg = 0x98; + +- reg_off = 0x100 * Channel; + dev = pDCTstat->dev_dct; + + /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/ +- val = Get_NB32(dev, 0x88 + reg_off); ++ val = Get_NB32_DCT(dev, Channel, 0x88); + SubTotal = ((val & 0x0f) + 1) << 1; /* SubTotal is 1/2 Memclk unit */ + + /* If registered DIMMs are being used then add 1 MEMCLK to the sub-total*/ +- val = Get_NB32(dev, 0x90 + reg_off); ++ val = Get_NB32_DCT(dev, Channel, 0x90); + if(!(val & (1 << UnBuffDimm))) + SubTotal += 2; + + /*If the address prelaunch is setup for 1/2 MEMCLKs then add 1, + * else add 2 to the sub-total. if (AddrCmdSetup || CsOdtSetup + * || CkeSetup) then K := K + 2; */ +- val = Get_NB32_index_wait(dev, index_reg, 0x04); ++ val = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x04); + if(!(val & 0x00202020)) + SubTotal += 1; + else +@@ -351,7 +349,7 @@ u8 mct_GetStartMaxRdLat_D(struct MCTStatStruc *pMCTstat, + + /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs, + * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */ +- val = Get_NB32(dev, 0x78 + reg_off); ++ val = Get_NB32_DCT(dev, Channel, 0x78); + SubTotal += 8 - (val & 0x0f); + + /* Convert bits 7-5 (also referred to as the course delay) of the current +@@ -367,7 +365,7 @@ u8 mct_GetStartMaxRdLat_D(struct MCTStatStruc *pMCTstat, + + /*New formula: + SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */ +- val = Get_NB32(dev, 0x94 + reg_off); ++ val = Get_NB32_DCT(dev, Channel, 0x94); + /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */ + val &= 7; + if (val >= 3) { +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c +index 1c3e322..0ff4484 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c +@@ -83,6 +83,12 @@ void PrepareC_DCT(struct MCTStatStruc *pMCTstat, + pDCTstat->C_DCTPtr[dct]->Status[DCT_STATUS_REGISTERED] = 0; + } + ++ if (pDCTstat->Status & (1 << SB_LoadReduced)) { ++ pDCTstat->C_DCTPtr[dct]->Status[DCT_STATUS_LOAD_REDUCED] = 1; ++ } else { ++ pDCTstat->C_DCTPtr[dct]->Status[DCT_STATUS_LOAD_REDUCED] = 0; ++ } ++ + pDCTstat->C_DCTPtr[dct]->RegMan1Present = pDCTstat->RegMan1Present; + + for (dimm = 0; dimm < MAX_TOTAL_DIMMS; dimm++) { +@@ -103,13 +109,13 @@ void EnableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDC + { + u32 val; + +- val = Get_NB32(pDCTstat->dev_dct, 0x94); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x94); + val |= 1 << 11; +- Set_NB32(pDCTstat->dev_dct, 0x94, val); ++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x94, val); + +- val = Get_NB32(pDCTstat->dev_dct, 0x94 + 0x100); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x94); + val |= 1 << 11; +- Set_NB32(pDCTstat->dev_dct, 0x94 + 0x100, val); ++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x94, val); + } + + void DisableZQcalibration(struct MCTStatStruc *pMCTstat, +@@ -117,15 +123,15 @@ void DisableZQcalibration(struct MCTStatStruc *pMCTstat, + { + u32 val; + +- val = Get_NB32(pDCTstat->dev_dct, 0x94); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x94); + val &= ~(1 << 11); + val &= ~(1 << 10); +- Set_NB32(pDCTstat->dev_dct, 0x94, val); ++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x94, val); + +- val = Get_NB32(pDCTstat->dev_dct, 0x94 + 0x100); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x94); + val &= ~(1 << 11); + val &= ~(1 << 10); +- Set_NB32(pDCTstat->dev_dct, 0x94 + 0x100, val); ++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x94, val); + } + + static void EnterSelfRefresh(struct MCTStatStruc *pMCTstat, +@@ -142,23 +148,23 @@ static void EnterSelfRefresh(struct MCTStatStruc *pMCTstat, + + /* Program F2x[1, 0]90[EnterSelfRefresh]=1. */ + if (DCT0Present) { +- val = Get_NB32(pDCTstat->dev_dct, 0x90); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x90); + val |= 1 << EnterSelfRef; +- Set_NB32(pDCTstat->dev_dct, 0x90, val); ++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x90, val); + } + if (DCT1Present) { +- val = Get_NB32(pDCTstat->dev_dct, 0x90 + 0x100); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x90); + val |= 1 << EnterSelfRef; +- Set_NB32(pDCTstat->dev_dct, 0x90 + 0x100, val); ++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x90, val); + } + /* Wait until the hardware resets F2x[1, 0]90[EnterSelfRefresh]=0. */ + if (DCT0Present) + do { +- val = Get_NB32(pDCTstat->dev_dct, 0x90); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x90); + } while (val & (1 <<EnterSelfRef)); + if (DCT1Present) + do { +- val = Get_NB32(pDCTstat->dev_dct, 0x90 + 0x100); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x90); + } while (val & (1 <<EnterSelfRef)); + } + +@@ -168,8 +174,11 @@ static void EnterSelfRefresh(struct MCTStatStruc *pMCTstat, + static void ChangeMemClk(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) + { +- u8 DCT0Present, DCT1Present; +- u32 val; ++ uint8_t DCT0Present; ++ uint8_t DCT1Present; ++ uint32_t dword; ++ uint32_t mask; ++ uint32_t offset; + + DCT0Present = pDCTstat->DIMMValidDCT[0]; + if (pDCTstat->GangedMode) +@@ -177,76 +186,134 @@ static void ChangeMemClk(struct MCTStatStruc *pMCTstat, + else + DCT1Present = pDCTstat->DIMMValidDCT[1]; + +- /* Program F2x[1, 0]90[EnterSelfRefresh]=1. */ +- if (DCT0Present) { +- val = Get_NB32_index_wait(pDCTstat->dev_dct, 0x98, 8); +- val |= 1 << DisAutoComp; +- Set_NB32_index_wait(pDCTstat->dev_dct, 0x98, 8, val); +- } +- if (DCT1Present) { +- val = Get_NB32_index_wait(pDCTstat->dev_dct, 0x98 + 0x100, 8); +- val |= 1 << DisAutoComp; +- Set_NB32_index_wait(pDCTstat->dev_dct, 0x98 + 0x100, 8, val); ++ if (is_fam15h()) { ++ /* Program D18F2x9C_x0D0F_E006_dct[1:0][PllLockTime] = 0x190 */ ++ if (DCT0Present) { ++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, 0, 0x98, 0x0d0fe006); ++ dword &= ~(0x0000ffff); ++ dword |= 0x00000190; ++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, 0, 0x98, 0x0d0fe006, dword); ++ } ++ if (DCT1Present) { ++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, 1, 0x98, 0x0d0fe006); ++ dword &= ~(0x0000ffff); ++ dword |= 0x00000190; ++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, 1, 0x98, 0x0d0fe006, dword); ++ } ++ } else { ++ /* Program F2x[1, 0]9C[DisAutoComp]=1. */ ++ if (DCT0Present) { ++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, 0, 0x98, 8); ++ dword |= 1 << DisAutoComp; ++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, 0, 0x98, 8, dword); ++ mct_Wait(100); /* Wait for 5us */ ++ } ++ if (DCT1Present) { ++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, 1, 0x98, 8); ++ dword |= 1 << DisAutoComp; ++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, 1, 0x98, 8, dword); ++ mct_Wait(100); /* Wait for 5us */ ++ } + } + + /* Program F2x[1, 0]94[MemClkFreqVal] = 0. */ + if (DCT0Present) { +- val = Get_NB32(pDCTstat->dev_dct, 0x94); +- val &= ~(1 << MemClkFreqVal); +- Set_NB32(pDCTstat->dev_dct, 0x94, val); ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x94); ++ dword &= ~(1 << MemClkFreqVal); ++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x94, dword); + } + if (DCT1Present) { +- val = Get_NB32(pDCTstat->dev_dct, 0x94 + 0x100); +- val &= ~(1 << MemClkFreqVal); +- Set_NB32(pDCTstat->dev_dct, 0x94 + 0x100, val); ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x94); ++ dword &= ~(1 << MemClkFreqVal); ++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x94, dword); + } + + /* Program F2x[1, 0]94[MemClkFreq] to specify the target MEMCLK frequency. */ ++ if (is_fam15h()) { ++ offset = 0x0; ++ mask = 0x1f; ++ } else { ++ offset = 0x1; ++ mask = 0x7; ++ } + if (DCT0Present) { +- val = Get_NB32(pDCTstat->dev_dct, 0x94); +- val &= 0xFFFFFFF8; +- val |= pDCTstat->TargetFreq - 1; +- Set_NB32(pDCTstat->dev_dct, 0x94, val); ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x94); ++ dword &= ~mask; ++ dword |= (pDCTstat->TargetFreq - offset) & mask; ++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x94, dword); + } + if (DCT1Present) { +- val = Get_NB32(pDCTstat->dev_dct, 0x94 + 0x100); +- val &= 0xFFFFFFF8; +- val |= pDCTstat->TargetFreq - 1; +- Set_NB32(pDCTstat->dev_dct, 0x94 + 0x100, val); ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x94); ++ dword &= ~mask; ++ dword |= (pDCTstat->TargetFreq - offset) & mask; ++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x94, dword); ++ } ++ ++ if (is_fam15h()) { ++ if (DCT0Present) { ++ mctGet_PS_Cfg_D(pMCTstat, pDCTstat, 0); ++ set_2t_configuration(pMCTstat, pDCTstat, 0); ++ mct_BeforePlatformSpec(pMCTstat, pDCTstat, 0); ++ mct_PlatformSpec(pMCTstat, pDCTstat, 0); ++ } ++ if (DCT1Present) { ++ mctGet_PS_Cfg_D(pMCTstat, pDCTstat, 1); ++ set_2t_configuration(pMCTstat, pDCTstat, 1); ++ mct_BeforePlatformSpec(pMCTstat, pDCTstat, 1); ++ mct_PlatformSpec(pMCTstat, pDCTstat, 1); ++ } + } + + /* Program F2x[1, 0]94[MemClkFreqVal] = 1. */ + if (DCT0Present) { +- val = Get_NB32(pDCTstat->dev_dct, 0x94); +- val |= 1 << MemClkFreqVal; +- Set_NB32(pDCTstat->dev_dct, 0x94, val); ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x94); ++ dword |= 1 << MemClkFreqVal; ++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x94, dword); + } + if (DCT1Present) { +- val = Get_NB32(pDCTstat->dev_dct, 0x94 + 0x100); +- val |= 1 << MemClkFreqVal; +- Set_NB32(pDCTstat->dev_dct, 0x94 + 0x100, val); ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x94); ++ dword |= 1 << MemClkFreqVal; ++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x94, dword); + } + + /* Wait until F2x[1, 0]94[FreqChgInProg]=0. */ + if (DCT0Present) + do { +- val = Get_NB32(pDCTstat->dev_dct, 0x94); +- } while (val & (1 << FreqChgInProg)); ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x94); ++ } while (dword & (1 << FreqChgInProg)); + if (DCT1Present) + do { +- val = Get_NB32(pDCTstat->dev_dct, 0x94 + 0x100); +- } while (val & (1 << FreqChgInProg)); +- +- /* Program F2x[1, 0]94[MemClkFreqVal] = 0. */ +- if (DCT0Present) { +- val = Get_NB32_index_wait(pDCTstat->dev_dct, 0x98, 8); +- val &= ~(1 << DisAutoComp); +- Set_NB32_index_wait(pDCTstat->dev_dct, 0x98, 8, val); +- } +- if (DCT1Present) { +- val = Get_NB32_index_wait(pDCTstat->dev_dct, 0x98 + 0x100, 8); +- val &= ~(1 << DisAutoComp); +- Set_NB32_index_wait(pDCTstat->dev_dct, 0x98 + 0x100, 8, val); ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x94); ++ } while (dword & (1 << FreqChgInProg)); ++ ++ if (is_fam15h()) { ++ /* Program D18F2x9C_x0D0F_E006_dct[1:0][PllLockTime] = 0xf */ ++ if (DCT0Present) { ++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, 0, 0x98, 0x0d0fe006); ++ dword &= ~(0x0000ffff); ++ dword |= 0x0000000f; ++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, 0, 0x98, 0x0d0fe006, dword); ++ } ++ if (DCT1Present) { ++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, 1, 0x98, 0x0d0fe006); ++ dword &= ~(0x0000ffff); ++ dword |= 0x0000000f; ++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, 1, 0x98, 0x0d0fe006, dword); ++ } ++ } else { ++ /* Program F2x[1, 0]9C[DisAutoComp] = 0. */ ++ if (DCT0Present) { ++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, 0, 0x98, 8); ++ dword &= ~(1 << DisAutoComp); ++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, 0, 0x98, 8, dword); ++ mct_Wait(15000); /* Wait for 750us */ ++ } ++ if (DCT1Present) { ++ dword = Get_NB32_index_wait_DCT(pDCTstat->dev_dct, 1, 0x98, 8); ++ dword &= ~(1 << DisAutoComp); ++ Set_NB32_index_wait_DCT(pDCTstat->dev_dct, 1, 0x98, 8, dword); ++ mct_Wait(15000); /* Wait for 750us */ ++ } + } + } + +@@ -267,29 +334,46 @@ static void ExitSelfRefresh(struct MCTStatStruc *pMCTstat, + + /* Program F2x[1, 0]90[ExitSelfRef]=1 for both DCTs. */ + if (DCT0Present) { +- val = Get_NB32(pDCTstat->dev_dct, 0x90); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x90); + val |= 1 << ExitSelfRef; +- Set_NB32(pDCTstat->dev_dct, 0x90, val); ++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x90, val); + } + if (DCT1Present) { +- val = Get_NB32(pDCTstat->dev_dct, 0x90 + 0x100); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x90); + val |= 1 << ExitSelfRef; +- Set_NB32(pDCTstat->dev_dct, 0x90 + 0x100, val); ++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x90, val); + } + /* Wait until the hardware resets F2x[1, 0]90[ExitSelfRef]=0. */ + if (DCT0Present) + do { +- val = Get_NB32(pDCTstat->dev_dct, 0x90); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x90); + } while (val & (1 << ExitSelfRef)); + if (DCT1Present) + do { +- val = Get_NB32(pDCTstat->dev_dct, 0x90 + 0x100); ++ val = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x90); + } while (val & (1 << ExitSelfRef)); + } + + void SetTargetFreq(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) + { ++ uint32_t dword; ++ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE); ++ ++ if (is_fam15h()) { ++ /* Program F2x[1, 0]90[DisDllShutDownSR]=1. */ ++ if (pDCTstat->DIMMValidDCT[0]) { ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x90); ++ dword |= (0x1 << 27); ++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x90, dword); ++ } ++ if (pDCTstat->DIMMValidDCT[1]) { ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x90); ++ dword |= (0x1 << 27); ++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x90, dword); ++ } ++ } ++ + /* Program F2x[1,0]90[EnterSelfRefresh]=1. + * Wait until the hardware resets F2x[1,0]90[EnterSelfRefresh]=0. + */ +@@ -305,11 +389,38 @@ void SetTargetFreq(struct MCTStatStruc *pMCTstat, + */ + ChangeMemClk(pMCTstat, pDCTstat); + ++ if (is_fam15h()) { ++ uint8_t dct; ++ for (dct = 0; dct < 2; dct++) { ++ if (pDCTstat->DIMMValidDCT[dct]) { ++ phyAssistedMemFnceTraining(pMCTstat, pDCTstat); ++ InitPhyCompensation(pMCTstat, pDCTstat, dct); ++ } ++ } ++ } ++ + /* Program F2x[1,0]90[ExitSelfRef]=1 for both DCTs. + * Wait until the hardware resets F2x[1, 0]90[ExitSelfRef]=0. + */ + ExitSelfRefresh(pMCTstat, pDCTstat); + ++ if (is_fam15h()) { ++ if ((package_type == PT_C3) || (package_type == PT_GR)) { ++ /* Socket C32 or G34 */ ++ /* Program F2x[1, 0]90[DisDllShutDownSR]=0. */ ++ if (pDCTstat->DIMMValidDCT[0]) { ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 0, 0x90); ++ dword &= ~(0x1 << 27); ++ Set_NB32_DCT(pDCTstat->dev_dct, 0, 0x90, dword); ++ } ++ if (pDCTstat->DIMMValidDCT[1]) { ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, 1, 0x90); ++ dword &= ~(0x1 << 27); ++ Set_NB32_DCT(pDCTstat->dev_dct, 1, 0x90, dword); ++ } ++ } ++ } ++ + /* wait for 500 MCLKs after ExitSelfRef, 500*2.5ns=1250ns */ + mct_Wait(250); + +@@ -336,13 +447,13 @@ void SetTargetFreq(struct MCTStatStruc *pMCTstat, + static void Modify_OnDimmMirror(struct DCTStatStruc *pDCTstat, u8 dct, u8 set) + { + u32 val; +- u32 reg_off = dct * 0x100 + 0x44; +- while (reg_off < (dct * 0x100 + 0x60)) { +- val = Get_NB32(pDCTstat->dev_dct, reg_off); ++ u32 reg = 0x44; ++ while (reg < 0x60) { ++ val = Get_NB32_DCT(pDCTstat->dev_dct, dct, reg); + if (val & (1 << CSEnable)) + set ? (val |= 1 << onDimmMirror) : (val &= ~(1<<onDimmMirror)); +- Set_NB32(pDCTstat->dev_dct, reg_off, val); +- reg_off += 8; ++ Set_NB32_DCT(pDCTstat->dev_dct, dct, reg, val); ++ reg += 8; + } + } + +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c +index 9f42d54..7ea7901 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c +@@ -30,13 +30,22 @@ + * + *---------------------------------------------------------------------------- + */ +-u32 swapAddrBits_wl(sDCTStruct *pDCTData, u32 MRSValue); +-u32 swapBankBits(sDCTStruct *pDCTData, u32 MRSValue); +-void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl); +-void programODT(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm); +-void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass); +-void setWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm, u8 targetAddr); +-void getWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm); ++u32 swapAddrBits_wl(struct DCTStatStruc *pDCTstat, uint8_t dct, uint32_t MRSValue); ++u32 swapBankBits(struct DCTStatStruc *pDCTstat, uint8_t dct, uint32_t MRSValue); ++void prepareDimms(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, ++ u8 dct, u8 dimm, BOOL wl); ++void programODT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, u8 dimm); ++void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, u8 dimm, u8 pass); ++void setWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 dimm, u8 targetAddr, uint8_t pass); ++void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 dimm, uint8_t pass); ++ ++static int32_t abs(int32_t val) { ++ if (val < 0) ++ val *= -1; ++ ++ return val; ++} ++ + /* + *----------------------------------------------------------------------------- + * EXPORTED FUNCTIONS +@@ -62,34 +71,55 @@ void getWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm); + * OUT + *----------------------------------------------------------------------------- + */ +-void AgesaHwWlPhase1(sMCTStruct *pMCTData, sDCTStruct *pDCTData, +- u8 dimm, u8 pass) ++void AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, ++ u8 dct, u8 dimm, u8 pass) + { + u8 ByteLane; + u32 Value, Addr; + u16 Addl_Data_Offset, Addl_Data_Port; ++ sMCTStruct *pMCTData = pDCTstat->C_MCTPtr; ++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; + + pDCTData->WLPass = pass; + /* 1. Specify the target DIMM that is to be trained by programming + * F2x[1, 0]9C_x08[TrDimmSel]. + */ +- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT, ++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, + DRAM_ADD_DCT_PHY_CONTROL_REG, TrDimmSelStart, +- TrDimmSelEnd,(u32)dimm); ++ TrDimmSelEnd, (u32)dimm); ++ ++ if (is_fam15h()) { ++ /* Set TrNibbleSel = 0 ++ * ++ * TODO: Add support for x4 DIMMs ++ */ ++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_ADD_DCT_PHY_CONTROL_REG, 2, ++ 2, (u32)0); ++ } ++ + /* 2. Prepare the DIMMs for write levelization using DDR3-defined + * MR commands. */ +- prepareDimms(pMCTData, pDCTData,dimm, TRUE); ++ prepareDimms(pMCTstat, pDCTstat, dct, dimm, TRUE); ++ + /* 3. After the DIMMs are configured, BIOS waits 40 MEMCLKs to + * satisfy DDR3-defined internal DRAM timing. + */ +- pMCTData->AgesaDelay(40); ++ if (is_fam15h()) ++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 40); ++ else ++ pMCTData->AgesaDelay(40); ++ + /* 4. Configure the processor's DDR phy for write levelization training: */ +- procConifg(pMCTData,pDCTData, dimm, pass); ++ procConfig(pMCTstat, pDCTstat, dct, dimm, pass); ++ + /* 5. Begin write levelization training: +- * Program F2x[1, 0]9C_x08[WrtLevelTrEn]=1. */ +- if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx)) +- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT, ++ * Program F2x[1, 0]9C_x08[WrtLvTrEn]=1. */ ++ if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx | AMD_FAM15_ALL)) ++ { ++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, + DRAM_ADD_DCT_PHY_CONTROL_REG, WrtLvTrEn, WrtLvTrEn, 1); ++ } + else + { + /* Broadcast write to all D3Dbyte chipset register offset 0xc +@@ -98,7 +128,7 @@ void AgesaHwWlPhase1(sMCTStruct *pMCTData, sDCTStruct *pDCTData, + * retain value of 3:2 (Trdimmsel) + * reset bit 5 (FrzPR) + */ +- if (pDCTData->DctTrain) ++ if (dct) + { + Addl_Data_Offset=0x198; + Addl_Data_Port=0x19C; +@@ -123,29 +153,127 @@ void AgesaHwWlPhase1(sMCTStruct *pMCTData, sDCTStruct *pDCTData, + DctAccessDone, DctAccessDone)) == 0); + } + ++ if (is_fam15h()) ++ proc_MFENCE(); ++ + /* Wait 200 MEMCLKs. If executing pass 2, wait 32 MEMCLKs. */ +- pMCTData->AgesaDelay(140); ++ if (is_fam15h()) ++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 200); ++ else ++ pMCTData->AgesaDelay(140); ++ + /* Program F2x[1, 0]9C_x08[WrtLevelTrEn]=0. */ +- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT, ++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, + DRAM_ADD_DCT_PHY_CONTROL_REG, WrtLvTrEn, WrtLvTrEn, 0); ++ + /* Read from registers F2x[1, 0]9C_x[51:50] and F2x[1, 0]9C_x52 + * to get the gross and fine delay settings + * for the target DIMM and save these values. */ +- ByteLane = 0; +- while (ByteLane < MAX_BYTE_LANES) +- { +- getWLByteDelay(pDCTData,ByteLane, dimm); +- setWLByteDelay(pDCTData,ByteLane, dimm, 1); +- ByteLane++; ++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { ++ getWLByteDelay(pDCTstat, dct, ByteLane, dimm, pass); ++ } ++ ++ pDCTData->WLCriticalGrossDelayPrevPass = 0x1f; ++} ++ ++void AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, ++ u8 dct, u8 dimm, u8 pass) ++{ ++ u8 ByteLane; ++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; ++ ++ if (is_fam15h()) { ++ int32_t gross_diff[MAX_BYTE_LANES]; ++ int32_t cgd = pDCTData->WLCriticalGrossDelayPrevPass; ++ uint8_t index = (uint8_t)(MAX_BYTE_LANES * dimm); ++ ++ /* Calculate the Critical Gross Delay */ ++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { ++ /* Calculate the gross delay differential for this lane */ ++ gross_diff[ByteLane] = pDCTData->WLSeedGrossDelay[index+ByteLane] + pDCTData->WLGrossDelay[index+ByteLane]; ++ gross_diff[ByteLane] -= pDCTData->WLSeedPreGrossDelay[index+ByteLane]; ++ ++ /* WrDqDqsEarly values greater than 2 are reserved */ ++ if (gross_diff[ByteLane] < -2) ++ gross_diff[ByteLane] = -2; ++ ++ /* Update the Critical Gross Delay */ ++ if (gross_diff[ByteLane] < cgd) ++ cgd = gross_diff[ByteLane]; ++ } ++ ++ pDCTData->WLCriticalGrossDelayPrevPass = cgd; ++ ++ /* Compensate for occasional noise/instability causing sporadic training failure */ ++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { ++ uint16_t total_delay_seed = ((pDCTData->WLSeedGrossDelay[index+ByteLane] & 0x1f) << 5) | (pDCTData->WLSeedFineDelay[index+ByteLane] & 0x1f); ++ uint16_t total_delay_phy = ((pDCTData->WLGrossDelay[index+ByteLane] & 0x1f) << 5) | (pDCTData->WLFineDelay[index+ByteLane] & 0x1f); ++ if (abs(total_delay_phy - total_delay_seed) > 0x20) { ++ printk(BIOS_DEBUG, "%s: overriding faulty phy value\n", __func__); ++ pDCTData->WLGrossDelay[index+ByteLane] = pDCTData->WLSeedGrossDelay[index+ByteLane]; ++ pDCTData->WLFineDelay[index+ByteLane] = pDCTData->WLSeedFineDelay[index+ByteLane]; ++ } ++ } ++ } ++} ++ ++void AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, ++ u8 dct, u8 dimm, u8 pass) ++{ ++ u8 ByteLane; ++ sMCTStruct *pMCTData = pDCTstat->C_MCTPtr; ++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; ++ ++ if (is_fam15h()) { ++ uint32_t dword; ++ int32_t gross_diff[MAX_BYTE_LANES]; ++ int32_t cgd = pDCTData->WLCriticalGrossDelayPrevPass; ++ uint8_t index = (uint8_t)(MAX_BYTE_LANES * dimm); ++ ++ /* Apply offset(s) if needed */ ++ if (cgd < 0) { ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0xa8); ++ dword &= ~(0x3 << 24); /* WrDqDqsEarly = abs(cgd) */ ++ dword |= ((abs(cgd) & 0x3) << 24); ++ Set_NB32_DCT(pDCTstat->dev_dct, dct, 0xa8, dword); ++ ++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { ++ /* Calculate the gross delay differential for this lane */ ++ gross_diff[ByteLane] = pDCTData->WLSeedGrossDelay[index+ByteLane] + pDCTData->WLGrossDelay[index+ByteLane]; ++ gross_diff[ByteLane] -= pDCTData->WLSeedPreGrossDelay[index+ByteLane]; ++ ++ /* Prevent underflow in the presence of noise / instability*/ ++ if (gross_diff[ByteLane] < cgd) ++ gross_diff[ByteLane] = cgd; ++ ++ pDCTData->WLGrossDelay[index+ByteLane] = (gross_diff[ByteLane] + (abs(cgd) & 0x3)); ++ } ++ } else { ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0xa8); ++ dword &= ~(0x3 << 24); /* WrDqDqsEarly = 0 */ ++ Set_NB32_DCT(pDCTstat->dev_dct, dct, 0xa8, dword); ++ } ++ } ++ ++ /* Write the adjusted gross and fine delay settings ++ * to the target DIMM. */ ++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { ++ setWLByteDelay(pDCTstat, dct, ByteLane, dimm, 1, pass); + } + + /* 6. Configure DRAM Phy Control Register so that the phy stops driving + * write levelization ODT. */ +- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT, ++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, + DRAM_ADD_DCT_PHY_CONTROL_REG, WrLvOdtEn, WrLvOdtEn, 0); + ++ if (is_fam15h()) ++ proc_MFENCE(); ++ + /* Wait 10 MEMCLKs to allow for ODT signal settling. */ +- pMCTData->AgesaDelay(10); ++ if (is_fam15h()) ++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 10); ++ else ++ pMCTData->AgesaDelay(10); + + /* 7. Program the target DIMM back to normal operation by configuring + * the following (See section 2.8.5.4.1.1 +@@ -155,7 +283,7 @@ void AgesaHwWlPhase1(sMCTStruct *pMCTData, sDCTStruct *pDCTData, + * For a two DIMM system, program the Rtt value for the target DIMM + * to the normal operating termination: + */ +- prepareDimms(pMCTData, pDCTData,dimm,FALSE); ++ prepareDimms(pMCTstat, pDCTstat, dct, dimm, FALSE); + } + + /*---------------------------------------------------------------------------- +@@ -165,7 +293,7 @@ void AgesaHwWlPhase1(sMCTStruct *pMCTData, sDCTStruct *pDCTData, + */ + + /*----------------------------------------------------------------------------- +- * u32 swapAddrBits_wl(sDCTStruct *pDCTData, u32 MRSValue) ++ * u32 swapAddrBits_wl(struct DCTStatStruc *pDCTstat, uint8_t dct, u32 MRSValue) + * + * Description: + * This function swaps the bits in MSR register value +@@ -177,12 +305,17 @@ void AgesaHwWlPhase1(sMCTStruct *pMCTData, sDCTStruct *pDCTData, + * + * ---------------------------------------------------------------------------- + */ +-u32 swapAddrBits_wl(sDCTStruct *pDCTData, u32 MRSValue) ++u32 swapAddrBits_wl(struct DCTStatStruc *pDCTstat, uint8_t dct, uint32_t MRSValue) + { ++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; + u32 tempW, tempW1; + +- tempW1 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, +- FUN_DCT, DRAM_INIT, MrsChipSelStart, MrsChipSelEnd); ++ if (is_fam15h()) ++ tempW1 = get_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_INIT, MrsChipSelStartFam15, MrsChipSelEndFam15); ++ else ++ tempW1 = get_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_INIT, MrsChipSelStartFam10, MrsChipSelEndFam10); + if (tempW1 & 1) + { + if ((pDCTData->Status[DCT_STATUS_OnDimmMirror])) +@@ -201,7 +334,7 @@ u32 swapAddrBits_wl(sDCTStruct *pDCTData, u32 MRSValue) + } + + /*----------------------------------------------------------------------------- +- * u32 swapBankBits(sDCTStruct *pDCTData, u32 MRSValue) ++ * u32 swapBankBits(struct DCTStatStruc *pDCTstat, uint8_t dct, u32 MRSValue) + * + * Description: + * This function swaps the bits in MSR register value +@@ -213,12 +346,17 @@ u32 swapAddrBits_wl(sDCTStruct *pDCTData, u32 MRSValue) + * + * ---------------------------------------------------------------------------- + */ +-u32 swapBankBits(sDCTStruct *pDCTData, u32 MRSValue) ++u32 swapBankBits(struct DCTStatStruc *pDCTstat, uint8_t dct, u32 MRSValue) + { ++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; + u32 tempW, tempW1; + +- tempW1 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, +- FUN_DCT, DRAM_INIT, MrsChipSelStart, MrsChipSelEnd); ++ if (is_fam15h()) ++ tempW1 = get_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_INIT, MrsChipSelStartFam15, MrsChipSelEndFam15); ++ else ++ tempW1 = get_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_INIT, MrsChipSelStartFam10, MrsChipSelEndFam10); + if (tempW1 & 1) + { + if ((pDCTData->Status[DCT_STATUS_OnDimmMirror])) +@@ -269,7 +407,7 @@ static uint16_t unbuffered_dimm_nominal_termination_emrs(uint8_t number_of_dimms + return term; + } + +-static uint16_t unbuffered_dimm_dynamic_termination_emrs(uint8_t number_of_dimms, uint8_t frequency_index, uint8_t rank_count, uint8_t rank) ++static uint16_t unbuffered_dimm_dynamic_termination_emrs(uint8_t number_of_dimms, uint8_t frequency_index, uint8_t rank_count) + { + uint16_t term; + +@@ -300,27 +438,27 @@ static uint16_t unbuffered_dimm_dynamic_termination_emrs(uint8_t number_of_dimms + * + * Description: + * This function prepares DIMMS for training +- * +- * Parameters: +- * IN OUT *DCTData - Pointer to buffer with information about each DCT +- * *SPDData - Pointer to buffer with information about each DIMMs +- * SPD information +- * *MCTData - Pointer to buffer with runtime parameters, +- * IN Dimm - Logical DIMM number +- * WL - indicates if the routine is used for Write levelization +- * training +- * +- * OUT +- * ++ * Fam10h: BKDG Rev. 3.62 section 2.8.9.9.1 ++ * Fam15h: BKDG Rev. 3.14 section 2.10.5.8.1 + * ---------------------------------------------------------------------------- + */ +-void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl) ++void prepareDimms(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, ++ u8 dct, u8 dimm, BOOL wl) + { + u32 tempW, tempW1, tempW2, MrsBank; + u8 rank, currDimm, MemClkFreq; ++ sMCTStruct *pMCTData = pDCTstat->C_MCTPtr; ++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; ++ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE); ++ uint8_t number_of_dimms = pDCTData->MaxDimmsInstalled; + +- MemClkFreq = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, ++ if (is_fam15h()) { ++ MemClkFreq = get_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_CONFIG_HIGH, 0, 4); ++ } else { ++ MemClkFreq = get_Bits(pDCTData, dct, pDCTData->NodeId, + FUN_DCT, DRAM_CONFIG_HIGH, 0, 2); ++ } + /* Configure the DCT to send initialization MR commands to the target DIMM + * by programming the F2x[1,0]7C register using the following steps. + */ +@@ -328,52 +466,95 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl) + while ((rank < pDCTData->DimmRanks[dimm]) && (rank < 2)) + { + /* Program F2x[1, 0]7C[MrsChipSel[2:0]] for the current rank to be trained. */ +- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT, +- DRAM_INIT, MrsChipSelStart, MrsChipSelEnd, dimm*2+rank); ++ if (is_fam15h()) ++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_INIT, MrsChipSelStartFam15, MrsChipSelEndFam15, dimm*2+rank); ++ else ++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_INIT, MrsChipSelStartFam10, MrsChipSelEndFam10, dimm*2+rank); ++ + /* Program F2x[1, 0]7C[MrsBank[2:0]] for the appropriate internal DRAM + * register that defines the required DDR3-defined function for write + * levelization. + */ +- MrsBank = swapBankBits(pDCTData,1); +- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT, +- DRAM_INIT, MrsBankStart, MrsBankEnd, MrsBank); ++ MrsBank = swapBankBits(pDCTstat, dct, 1); ++ if (is_fam15h()) ++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_INIT, MrsBankStartFam15, MrsBankEndFam15, MrsBank); ++ else ++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_INIT, MrsBankStartFam10, MrsBankEndFam10, MrsBank); ++ + /* Program F2x[1, 0]7C[MrsAddress[15:0]] to the required DDR3-defined function + * for write levelization. + */ + tempW = 0;/* DLL_DIS = 0, DIC = 0, AL = 0, TDQS = 0 */ + +- /* Set TDQS=1b for x8 DIMM, TDQS=0b for x4 DIMM, when mixed x8 & x4 */ +- tempW2 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, +- FUN_DCT, DRAM_CONFIG_HIGH, RDqsEn, RDqsEn); +- if (tempW2) +- { +- if (pDCTData->DimmX8Present[dimm]) +- tempW |= 0x800; ++ /* Retrieve normal settings of the MRS control word and clear Rtt_Nom */ ++ if (is_fam15h()) { ++ tempW = mct_MR1(pMCTstat, pDCTstat, dct, dimm*2+rank) & 0xffff; ++ tempW &= ~(0x0244); ++ } else { ++ /* Set TDQS=1b for x8 DIMM, TDQS=0b for x4 DIMM, when mixed x8 & x4 */ ++ tempW2 = get_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_CONFIG_HIGH, RDqsEn, RDqsEn); ++ if (tempW2) ++ { ++ if (pDCTData->DimmX8Present[dimm]) ++ tempW |= 0x800; ++ } + } + + /* determine Rtt_Nom for WL & Normal mode */ +- if (pDCTData->Status[DCT_STATUS_REGISTERED]) { +- tempW1 = RttNomTargetRegDimm(pMCTData, pDCTData, dimm, wl, MemClkFreq, rank); +- } else { ++ if (is_fam15h()) { + if (wl) { +- if (rank == 0) { +- /* Get Rtt_WR for the current DIMM and rank */ +- uint16_t dynamic_term = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank); +- +- /* Convert dynamic termination code to corresponding nominal termination code */ +- if (dynamic_term == 0x200) +- tempW1 = 0x04; +- else if (dynamic_term == 0x400) +- tempW1 = 0x40; +- else +- tempW1 = 0x0; ++ if (number_of_dimms > 1) { ++ if (rank == 0) { ++ /* Get Rtt_WR for the current DIMM and rank */ ++ tempW2 = fam15_rttwr(pDCTstat, dct, dimm, rank, package_type); ++ } else { ++ tempW2 = fam15_rttnom(pDCTstat, dct, dimm, rank, package_type); ++ } + } else { +- tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank); ++ tempW2 = fam15_rttnom(pDCTstat, dct, dimm, rank, package_type); + } + } else { +- tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank); ++ tempW2 = fam15_rttnom(pDCTstat, dct, dimm, rank, package_type); ++ } ++ tempW1 = 0; ++ tempW1 |= ((tempW2 & 0x4) >> 2) << 9; ++ tempW1 |= ((tempW2 & 0x2) >> 1) << 6; ++ tempW1 |= ((tempW2 & 0x1) >> 0) << 2; ++ } else { ++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) { ++ tempW1 = RttNomTargetRegDimm(pMCTData, pDCTData, dimm, wl, MemClkFreq, rank); ++ } else { ++ if (wl) { ++ if (number_of_dimms > 1) { ++ if (rank == 0) { ++ /* Get Rtt_WR for the current DIMM and rank */ ++ uint16_t dynamic_term = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm]); ++ ++ /* Convert dynamic termination code to corresponding nominal termination code */ ++ if (dynamic_term == 0x200) ++ tempW1 = 0x04; ++ else if (dynamic_term == 0x400) ++ tempW1 = 0x40; ++ else ++ tempW1 = 0x0; ++ } else { ++ tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank); ++ } ++ } else { ++ tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank); ++ } ++ } else { ++ tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank); ++ } + } + } ++ ++ /* Apply Rtt_Nom to the MRS control word */ + tempW=tempW|tempW1; + + /* All ranks of the target DIMM are set to write levelization mode. */ +@@ -393,68 +574,105 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl) + tempW = bitTestSet(tempW1, Qoff); + } + } +- /* Program MrsAddress[5,1]=output driver impedance control (DIC): +- * based on F2x[1,0]84[DrvImpCtrl] +- */ +- tempW1 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, +- FUN_DCT, DRAM_MRS_REGISTER, DrvImpCtrlStart, DrvImpCtrlEnd); ++ ++ /* Program MrsAddress[5,1]=output driver impedance control (DIC) */ ++ if (is_fam15h()) { ++ tempW1 = fam15_dimm_dic(pDCTstat, dct, dimm, rank, package_type); ++ } else { ++ /* Read DIC from F2x[1,0]84[DrvImpCtrl] */ ++ tempW1 = get_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_MRS_REGISTER, DrvImpCtrlStart, DrvImpCtrlEnd); ++ } ++ ++ /* Apply DIC to the MRS control word */ + if (bitTest(tempW1, 1)) + tempW = bitTestSet(tempW, 5); + if (bitTest(tempW1, 0)) + tempW = bitTestSet(tempW, 1); + +- tempW = swapAddrBits_wl(pDCTData, tempW); ++ tempW = swapAddrBits_wl(pDCTstat, dct, tempW); ++ ++ if (is_fam15h()) ++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_INIT, MrsAddressStartFam15, MrsAddressEndFam15, tempW); ++ else ++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_INIT, MrsAddressStartFam10, MrsAddressEndFam10, tempW); + +- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT, +- DRAM_INIT, MrsAddressStart, MrsAddressEnd, tempW); + /* Program F2x[1, 0]7C[SendMrsCmd]=1 to initiate the command to + * the specified DIMM. + */ +- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT, ++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, + DRAM_INIT, SendMrsCmd, SendMrsCmd, 1); + /* Wait for F2x[1, 0]7C[SendMrsCmd] to be cleared by hardware. */ +- while ((get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, ++ while ((get_Bits(pDCTData, dct, pDCTData->NodeId, + FUN_DCT, DRAM_INIT, SendMrsCmd, SendMrsCmd)) == 0x1) + { + } ++ + /* Program F2x[1, 0]7C[MrsBank[2:0]] for the appropriate internal DRAM + * register that defines the required DDR3-defined function for Rtt_WR. + */ +- MrsBank = swapBankBits(pDCTData,2); +- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT, +- DRAM_INIT, MrsBankStart, MrsBankEnd, MrsBank); ++ MrsBank = swapBankBits(pDCTstat, dct, 2); ++ if (is_fam15h()) ++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_INIT, MrsBankStartFam15, MrsBankEndFam15, MrsBank); ++ else ++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_INIT, MrsBankStartFam10, MrsBankEndFam10, MrsBank); ++ + /* Program F2x[1, 0]7C[MrsAddress[15:0]] to the required DDR3-defined function + * for Rtt_WR (DRAMTermDyn). + */ + tempW = 0;/* PASR = 0,*/ +- /* program MrsAddress[7,6,5:3]=SRT,ASR,CWL, +- * based on F2x[1,0]84[19,18,22:20]=,SRT,ASR,Tcwl */ +- tempW1 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, +- FUN_DCT, DRAM_MRS_REGISTER, PCI_MIN_LOW, PCI_MAX_HIGH); +- if (bitTest(tempW1,19)) +- {tempW = bitTestSet(tempW, 7);} +- if (bitTest(tempW1,18)) +- {tempW = bitTestSet(tempW, 6);} +- /* tempW=tempW|(((tempW1>>20)&0x7)<<3); */ +- tempW=tempW|((tempW1&0x00700000)>>17); +- /* workaround for DR-B0 */ +- if ((pDCTData->LogicalCPUID & AMD_DR_Bx) && (pDCTData->Status[DCT_STATUS_REGISTERED])) +- tempW+=0x8; ++ ++ /* Retrieve normal settings of the MRS control word and clear Rtt_WR */ ++ if (is_fam15h()) { ++ tempW = mct_MR2(pMCTstat, pDCTstat, dct, dimm*2+rank) & 0xffff; ++ tempW &= ~(0x0600); ++ } else { ++ /* program MrsAddress[7,6,5:3]=SRT,ASR,CWL, ++ * based on F2x[1,0]84[19,18,22:20]=,SRT,ASR,Tcwl */ ++ tempW1 = get_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_MRS_REGISTER, PCI_MIN_LOW, PCI_MAX_HIGH); ++ if (bitTest(tempW1,19)) ++ {tempW = bitTestSet(tempW, 7);} ++ if (bitTest(tempW1,18)) ++ {tempW = bitTestSet(tempW, 6);} ++ /* tempW=tempW|(((tempW1>>20)&0x7)<<3); */ ++ tempW=tempW|((tempW1&0x00700000)>>17); ++ /* workaround for DR-B0 */ ++ if ((pDCTData->LogicalCPUID & AMD_DR_Bx) && (pDCTData->Status[DCT_STATUS_REGISTERED])) ++ tempW+=0x8; ++ } ++ + /* determine Rtt_WR for WL & Normal mode */ +- if (pDCTData->Status[DCT_STATUS_REGISTERED]) +- tempW1 = RttWrRegDimm(pMCTData, pDCTData, dimm, wl, MemClkFreq, rank); +- else +- tempW1 = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank); ++ if (is_fam15h()) { ++ tempW1 = (fam15_rttwr(pDCTstat, dct, dimm, rank, package_type) << 9); ++ } else { ++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) ++ tempW1 = RttWrRegDimm(pMCTData, pDCTData, dimm, wl, MemClkFreq, rank); ++ else ++ tempW1 = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm]); ++ } ++ ++ /* Apply Rtt_WR to the MRS control word */ + tempW=tempW|tempW1; +- tempW = swapAddrBits_wl(pDCTData,tempW); +- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT, +- DRAM_INIT, MrsAddressStart, MrsAddressEnd, tempW); ++ tempW = swapAddrBits_wl(pDCTstat, dct, tempW); ++ if (is_fam15h()) ++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_INIT, MrsAddressStartFam15, MrsAddressEndFam15, tempW); ++ else ++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_INIT, MrsAddressStartFam10, MrsAddressEndFam10, tempW); ++ + /* Program F2x[1, 0]7C[SendMrsCmd]=1 to initiate the command to + the specified DIMM.*/ +- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT, ++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, + DRAM_INIT, SendMrsCmd, SendMrsCmd, 1); ++ + /* Wait for F2x[1, 0]7C[SendMrsCmd] to be cleared by hardware. */ +- while ((get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, ++ while ((get_Bits(pDCTData, dct, pDCTData->NodeId, + FUN_DCT, DRAM_INIT, SendMrsCmd, SendMrsCmd)) == 0x1) + { + } +@@ -473,97 +691,163 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl) + rank = 0; + while ((rank < pDCTData->DimmRanks[currDimm]) && (rank < 2)) + { +- + /* Program F2x[1, 0]7C[MrsChipSel[2:0]] for the current rank + * to be trained. + */ +- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, +- FUN_DCT, DRAM_INIT, MrsChipSelStart, MrsChipSelEnd, currDimm*2+rank); ++ if (is_fam15h()) ++ set_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_INIT, MrsChipSelStartFam15, MrsChipSelEndFam15, currDimm*2+rank); ++ else ++ set_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_INIT, MrsChipSelStartFam10, MrsChipSelEndFam10, currDimm*2+rank); ++ + /* Program F2x[1, 0]7C[MrsBank[2:0]] for the appropriate internal + * DRAM register that defines the required DDR3-defined function + * for write levelization. + */ +- MrsBank = swapBankBits(pDCTData,1); +- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, +- FUN_DCT, DRAM_INIT, MrsBankStart, MrsBankEnd, MrsBank); ++ MrsBank = swapBankBits(pDCTstat, dct, 1); ++ if (is_fam15h()) ++ set_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_INIT, MrsBankStartFam15, MrsBankEndFam15, MrsBank); ++ else ++ set_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_INIT, MrsBankStartFam10, MrsBankEndFam10, MrsBank); ++ + /* Program F2x[1, 0]7C[MrsAddress[15:0]] to the required + * DDR3-defined function for write levelization. + */ + tempW = 0;/* DLL_DIS = 0, DIC = 0, AL = 0, TDQS = 0, Level=0, Qoff=0 */ + +- /* Set TDQS=1b for x8 DIMM, TDQS=0b for x4 DIMM, when mixed x8 & x4 */ +- tempW2 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, +- FUN_DCT, DRAM_CONFIG_HIGH, RDqsEn, RDqsEn); +- if (tempW2) +- { +- if (pDCTData->DimmX8Present[currDimm]) +- tempW |= 0x800; ++ /* Retrieve normal settings of the MRS control word and clear Rtt_Nom */ ++ if (is_fam15h()) { ++ tempW = mct_MR1(pMCTstat, pDCTstat, dct, dimm*2+rank) & 0xffff; ++ tempW &= ~(0x0244); ++ } else { ++ /* Set TDQS=1b for x8 DIMM, TDQS=0b for x4 DIMM, when mixed x8 & x4 */ ++ tempW2 = get_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_CONFIG_HIGH, RDqsEn, RDqsEn); ++ if (tempW2) ++ { ++ if (pDCTData->DimmX8Present[currDimm]) ++ tempW |= 0x800; ++ } + } + + /* determine Rtt_Nom for WL & Normal mode */ +- if (pDCTData->Status[DCT_STATUS_REGISTERED]) +- tempW1 = RttNomNonTargetRegDimm(pMCTData, pDCTData, currDimm, wl, MemClkFreq, rank); +- else +- tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank); ++ if (is_fam15h()) { ++ tempW2 = fam15_rttnom(pDCTstat, dct, dimm, rank, package_type); ++ tempW1 = 0; ++ tempW1 |= ((tempW2 & 0x4) >> 2) << 9; ++ tempW1 |= ((tempW2 & 0x2) >> 1) << 6; ++ tempW1 |= ((tempW2 & 0x1) >> 0) << 2; ++ } else { ++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) ++ tempW1 = RttNomNonTargetRegDimm(pMCTData, pDCTData, currDimm, wl, MemClkFreq, rank); ++ else ++ tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank); ++ } ++ ++ /* Apply Rtt_Nom to the MRS control word */ + tempW=tempW|tempW1; +- /* program MrsAddress[5,1]=output driver impedance control (DIC): +- * based on F2x[1,0]84[DrvImpCtrl] */ +- tempW1 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, +- FUN_DCT, DRAM_MRS_REGISTER, DrvImpCtrlStart, DrvImpCtrlEnd); ++ ++ /* Program MrsAddress[5,1]=output driver impedance control (DIC) */ ++ if (is_fam15h()) { ++ tempW1 = fam15_dimm_dic(pDCTstat, dct, dimm, rank, package_type); ++ } else { ++ /* Read DIC from F2x[1,0]84[DrvImpCtrl] */ ++ tempW1 = get_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_MRS_REGISTER, DrvImpCtrlStart, DrvImpCtrlEnd); ++ } ++ ++ /* Apply DIC to the MRS control word */ + if (bitTest(tempW1,1)) + {tempW = bitTestSet(tempW, 5);} + if (bitTest(tempW1,0)) + {tempW = bitTestSet(tempW, 1);} +- tempW = swapAddrBits_wl(pDCTData,tempW); +- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, +- FUN_DCT, DRAM_INIT, MrsAddressStart, MrsAddressEnd, tempW); ++ ++ tempW = swapAddrBits_wl(pDCTstat, dct, tempW); ++ ++ if (is_fam15h()) ++ set_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_INIT, MrsAddressStartFam15, MrsAddressEndFam15, tempW); ++ else ++ set_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_INIT, MrsAddressStartFam10, MrsAddressEndFam10, tempW); ++ + /* Program F2x[1, 0]7C[SendMrsCmd]=1 to initiate the command + * to the specified DIMM. + */ +- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, ++ set_Bits(pDCTData, dct, pDCTData->NodeId, + FUN_DCT, DRAM_INIT, SendMrsCmd, SendMrsCmd, 1); ++ + /* Wait for F2x[1, 0]7C[SendMrsCmd] to be cleared by hardware. */ +- while ((get_Bits(pDCTData, pDCTData->CurrDct, ++ while ((get_Bits(pDCTData, dct, + pDCTData->NodeId, FUN_DCT, DRAM_INIT, + SendMrsCmd, SendMrsCmd)) == 1); ++ + /* Program F2x[1, 0]7C[MrsBank[2:0]] for the appropriate internal DRAM + * register that defines the required DDR3-defined function for Rtt_WR. + */ +- MrsBank = swapBankBits(pDCTData,2); +- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT, +- DRAM_INIT, MrsBankStart, MrsBankEnd, MrsBank); ++ MrsBank = swapBankBits(pDCTstat, dct, 2); ++ if (is_fam15h()) ++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_INIT, MrsBankStartFam15, MrsBankEndFam15, MrsBank); ++ else ++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_INIT, MrsBankStartFam10, MrsBankEndFam10, MrsBank); ++ + /* Program F2x[1, 0]7C[MrsAddress[15:0]] to the required DDR3-defined function + * for Rtt_WR (DRAMTermDyn). + */ + tempW = 0;/* PASR = 0,*/ +- /* program MrsAddress[7,6,5:3]=SRT,ASR,CWL, +- * based on F2x[1,0]84[19,18,22:20]=,SRT,ASR,Tcwl */ +- tempW1 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, +- FUN_DCT, DRAM_MRS_REGISTER, PCI_MIN_LOW, PCI_MAX_HIGH); +- if (bitTest(tempW1,19)) +- {tempW = bitTestSet(tempW, 7);} +- if (bitTest(tempW1,18)) +- {tempW = bitTestSet(tempW, 6);} +- /* tempW=tempW|(((tempW1>>20)&0x7)<<3); */ +- tempW=tempW|((tempW1&0x00700000)>>17); +- /* workaround for DR-B0 */ +- if ((pDCTData->LogicalCPUID & AMD_DR_Bx) && (pDCTData->Status[DCT_STATUS_REGISTERED])) +- tempW+=0x8; ++ ++ /* Retrieve normal settings of the MRS control word and clear Rtt_WR */ ++ if (is_fam15h()) { ++ tempW = mct_MR2(pMCTstat, pDCTstat, dct, dimm*2+rank) & 0xffff; ++ tempW &= ~(0x0600); ++ } else { ++ /* program MrsAddress[7,6,5:3]=SRT,ASR,CWL, ++ * based on F2x[1,0]84[19,18,22:20]=,SRT,ASR,Tcwl */ ++ tempW1 = get_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_MRS_REGISTER, PCI_MIN_LOW, PCI_MAX_HIGH); ++ if (bitTest(tempW1,19)) ++ {tempW = bitTestSet(tempW, 7);} ++ if (bitTest(tempW1,18)) ++ {tempW = bitTestSet(tempW, 6);} ++ /* tempW=tempW|(((tempW1>>20)&0x7)<<3); */ ++ tempW=tempW|((tempW1&0x00700000)>>17); ++ /* workaround for DR-B0 */ ++ if ((pDCTData->LogicalCPUID & AMD_DR_Bx) && (pDCTData->Status[DCT_STATUS_REGISTERED])) ++ tempW+=0x8; ++ } ++ + /* determine Rtt_WR for WL & Normal mode */ +- if (pDCTData->Status[DCT_STATUS_REGISTERED]) +- tempW1 = RttWrRegDimm(pMCTData, pDCTData, currDimm, wl, MemClkFreq, rank); +- else +- tempW1 = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank); ++ if (is_fam15h()) { ++ tempW1 = (fam15_rttwr(pDCTstat, dct, dimm, rank, package_type) << 9); ++ } else { ++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) ++ tempW1 = RttWrRegDimm(pMCTData, pDCTData, currDimm, wl, MemClkFreq, rank); ++ else ++ tempW1 = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm]); ++ } ++ ++ /* Apply Rtt_WR to the MRS control word */ + tempW=tempW|tempW1; +- tempW = swapAddrBits_wl(pDCTData,tempW); +- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT, +- DRAM_INIT, MrsAddressStart, MrsAddressEnd, tempW); ++ tempW = swapAddrBits_wl(pDCTstat, dct, tempW); ++ if (is_fam15h()) ++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_INIT, MrsAddressStartFam15, MrsAddressEndFam15, tempW); ++ else ++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, ++ DRAM_INIT, MrsAddressStartFam10, MrsAddressEndFam10, tempW); ++ + /* Program F2x[1, 0]7C[SendMrsCmd]=1 to initiate the command to + the specified DIMM.*/ +- set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT, ++ set_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, + DRAM_INIT, SendMrsCmd, SendMrsCmd, 1); ++ + /* Wait for F2x[1, 0]7C[SendMrsCmd] to be cleared by hardware. */ +- while ((get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, ++ while ((get_Bits(pDCTData, dct, pDCTData->NodeId, + FUN_DCT, DRAM_INIT, SendMrsCmd, SendMrsCmd)) == 0x1) + { + } +@@ -587,29 +871,60 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl) + * OUT + * ---------------------------------------------------------------------------- + */ +-void programODT(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm) ++void programODT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, u8 dimm) + { ++ sMCTStruct *pMCTData = pDCTstat->C_MCTPtr; ++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; ++ + u8 WrLvOdt1=0; + +- if (pDCTData->Status[DCT_STATUS_REGISTERED] == 0) { +- if ((pDCTData->DctCSPresent & 0x05) == 0x05) { +- WrLvOdt1 = 0x03; +- } else if (bitTest((u32)pDCTData->DctCSPresent,(u8)(dimm*2+1))) { +- WrLvOdt1 = (u8)bitTestSet(WrLvOdt1, dimm+2); ++ if (is_fam15h()) { ++ /* Convert DIMM number to CS */ ++ uint32_t dword; ++ uint8_t cs; ++ uint8_t rank = 0; ++ ++ cs = (dimm * 2) + rank; ++ ++ /* Fetch preprogammed ODT pattern from configuration registers */ ++ dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, ((cs>3)?0x23c:0x238)); ++ if ((cs == 7) || (cs == 3)) ++ WrLvOdt1 = ((dword >> 24) & 0xf); ++ else if ((cs == 6) || (cs == 2)) ++ WrLvOdt1 = ((dword >> 16) & 0xf); ++ else if ((cs == 5) || (cs == 1)) ++ WrLvOdt1 = ((dword >> 8) & 0xf); ++ else if ((cs == 4) || (cs == 0)) ++ WrLvOdt1 = (dword & 0xf); ++ } else { ++ if (pDCTData->Status[DCT_STATUS_REGISTERED] == 0) { ++ if ((pDCTData->DctCSPresent & 0x05) == 0x05) { ++ WrLvOdt1 = 0x03; ++ } else if (bitTest((u32)pDCTData->DctCSPresent,(u8)(dimm*2+1))) { ++ WrLvOdt1 = (u8)bitTestSet(WrLvOdt1, dimm+2); ++ } else { ++ WrLvOdt1 = (u8)bitTestSet(WrLvOdt1, dimm); ++ } + } else { +- WrLvOdt1 = (u8)bitTestSet(WrLvOdt1, dimm); ++ WrLvOdt1 = WrLvOdtRegDimm(pMCTData, pDCTData, dimm); + } +- } else { +- WrLvOdt1 = WrLvOdtRegDimm(pMCTData, pDCTData, dimm); + } + +- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT, ++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, + DRAM_ADD_DCT_PHY_CONTROL_REG, 8, 11, (u32)WrLvOdt1); + + } + ++#ifdef UNUSED_CODE ++static uint16_t fam15h_next_lowest_memclk_freq(uint16_t memclk_freq) ++{ ++ uint16_t fam15h_next_lowest_freq_tab[] = {0, 0, 0, 0, 0x4, 0, 0x4, 0, 0, 0, 0x6, 0, 0, 0, 0xa, 0, 0, 0, 0xe, 0, 0, 0, 0x12}; ++ return fam15h_next_lowest_freq_tab[memclk_freq]; ++} ++#endif ++ + /*----------------------------------------------------------------------------- +- * void procConifg(MCTStruct *MCTData,DCTStruct *DCTData, u8 Dimm, u8 Pass) ++ * void procConfig(MCTStruct *MCTData,DCTStruct *DCTData, u8 Dimm, u8 Pass) + * + * Description: + * This function programs the ODT values for the NB +@@ -622,31 +937,43 @@ void programODT(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm) + * OUT + * ---------------------------------------------------------------------------- + */ +-void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass) ++void procConfig(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t dct, u8 dimm, u8 pass) + { +- u8 ByteLane, Seed_Gross, Seed_Fine, MemClkFreq; ++ u8 ByteLane, MemClkFreq; ++ int32_t Seed_Gross; ++ int32_t Seed_Fine; ++ uint8_t Seed_PreGross; + u32 Value, Addr; + u16 Addl_Data_Offset, Addl_Data_Port; +- u16 freq_tab[] = {400, 533, 667, 800}; ++ sMCTStruct *pMCTData = pDCTstat->C_MCTPtr; ++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; ++ u16 fam10h_freq_tab[] = {400, 533, 667, 800}; ++ uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933}; + +- /* MemClkFreq: 3: 400MHz; 4: 533MHz; 5: 667MHz; 6: 800MHz */ +- MemClkFreq = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, +- FUN_DCT, DRAM_CONFIG_HIGH, 0, 2); ++ if (is_fam15h()) { ++ /* MemClkFreq: 0x4: 333MHz; 0x6: 400MHz; 0xa: 533MHz; 0xe: 667MHz; 0x12: 800MHz; 0x16: 933MHz */ ++ MemClkFreq = get_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_CONFIG_HIGH, 0, 4); ++ } else { ++ /* MemClkFreq: 3: 400MHz; 4: 533MHz; 5: 667MHz; 6: 800MHz */ ++ MemClkFreq = get_Bits(pDCTData, dct, pDCTData->NodeId, ++ FUN_DCT, DRAM_CONFIG_HIGH, 0, 2); ++ } + + /* Program F2x[1, 0]9C_x08[WrLvOdt[3:0]] to the proper ODT settings for the + * current memory subsystem configuration. + */ +- programODT(pMCTData, pDCTData, dimm); ++ programODT(pMCTstat, pDCTstat, dct, dimm); + + /* Program F2x[1,0]9C_x08[WrLvOdtEn]=1 */ +- if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx)) { +- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT, ++ if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx | AMD_FAM15_ALL)) { ++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, + DRAM_ADD_DCT_PHY_CONTROL_REG, WrLvOdtEn, WrLvOdtEn, (u32)1); + } + else + { + /* Program WrLvOdtEn=1 through set bit 12 of D3CSODT reg offset 0 for Rev.B */ +- if (pDCTData->DctTrain) ++ if (dct) + { + Addl_Data_Offset=0x198; + Addl_Data_Port=0x19C; +@@ -669,33 +996,94 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass) + DctAccessDone, DctAccessDone)) == 0); + } + ++ if (is_fam15h()) ++ proc_MFENCE(); ++ + /* Wait 10 MEMCLKs to allow for ODT signal settling. */ +- pMCTData->AgesaDelay(10); ++ if (is_fam15h()) ++ precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 10); ++ else ++ pMCTData->AgesaDelay(10); ++ ++ /* Program write levelling seed values */ + if (pass == 1) + { +- if (pDCTData->Status[DCT_STATUS_REGISTERED]) +- { +- if(pDCTData->RegMan1Present & ((1<<(dimm*2+pDCTData->DctTrain)))) ++ /* Pass 1 */ ++ if (is_fam15h()) { ++ uint8_t AddrCmdPrelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */ ++ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE); ++ uint16_t Seed_Total = 0; ++ if (package_type == PT_GR) { ++ /* Socket G34: Fam15h BKDG v3.14 Table 96 */ ++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) { ++ Seed_Total = 0x41; ++ } else if (pDCTData->Status[DCT_STATUS_LOAD_REDUCED]) { ++ Seed_Total = 0x0; ++ } else { ++ Seed_Total = 0xf; ++ } ++ } else if (package_type == PT_C3) { ++ /* Socket C32: Fam15h BKDG v3.14 Table 97 */ ++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) { ++ Seed_Total = 0x3e; ++ } else if (pDCTData->Status[DCT_STATUS_LOAD_REDUCED]) { ++ Seed_Total = 0x0; ++ } else { ++ Seed_Total = 0x12; ++ } ++ } else if (package_type == PT_M2) { ++ /* Socket AM3: Fam15h BKDG v3.14 Table 98 */ ++ Seed_Total = 0xf; ++ } ++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) ++ Seed_Total += ((AddrCmdPrelaunch)?0x10:0x0); ++ ++ /* Adjust seed for the minimum platform supported frequency */ ++ Seed_Total = (int32_t) (((((int64_t) Seed_Total) * ++ fam15h_freq_tab[MemClkFreq] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100))); ++ ++ Seed_Gross = (Seed_Total >> 5) & 0x1f; ++ Seed_Fine = Seed_Total & 0x1f; ++ ++ /* Save seed values for later use */ ++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { ++ pDCTData->WLSeedGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross; ++ pDCTData->WLSeedFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine; ++ ++ if (Seed_Gross == 0) ++ Seed_PreGross = 0; ++ else if (Seed_Gross & 0x1) ++ Seed_PreGross = 1; ++ else ++ Seed_PreGross = 2; ++ ++ pDCTData->WLSeedPreGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_PreGross; ++ } ++ } else { ++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) + { +- Seed_Gross = 0x02; +- Seed_Fine = 0x16; ++ if(pDCTData->RegMan1Present & ((1<<(dimm*2+dct)))) ++ { ++ Seed_Gross = 0x02; ++ Seed_Fine = 0x16; ++ } ++ else ++ { ++ Seed_Gross = 0x02; ++ Seed_Fine = 0x00; ++ } + } + else + { +- Seed_Gross = 0x02; +- Seed_Fine = 0x00; +- } +- } +- else +- { +- if (MemClkFreq == 6) { +- /* DDR-800 */ +- Seed_Gross = 0x00; +- Seed_Fine = 0x1a; +- } else { +- /* Use settings for DDR-400 (interpolated from BKDG) */ +- Seed_Gross = 0x00; +- Seed_Fine = 0x0d; ++ if (MemClkFreq == 6) { ++ /* DDR-800 */ ++ Seed_Gross = 0x00; ++ Seed_Fine = 0x1a; ++ } else { ++ /* Use settings for DDR-400 (interpolated from BKDG) */ ++ Seed_Gross = 0x00; ++ Seed_Fine = 0x0d; ++ } + } + } + for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) +@@ -711,39 +1099,91 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass) + pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross; + pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine; + } +- } else { /* Pass 2 */ ++ } else { ++ /* Pass 2 */ + /* From BKDG, Write Leveling Seed Value. */ +- u32 RegisterDelay, SeedTotal; +- for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) +- { +- if (pDCTData->Status[DCT_STATUS_REGISTERED]) +- RegisterDelay = 0x20; /* TODO: ((RCW2 & BIT0) == 0) ? 0x20 : 0x30; */ +- else +- RegisterDelay = 0; +- SeedTotal = (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) | +- (pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5); +- /* SeedTotalPreScaling = (the total delay value in F2x[1, 0]9C_x[4A:30] from pass 1 of write levelization +- training) - RegisterDelay. */ +- SeedTotal = (uint16_t) (RegisterDelay + ((((uint64_t) SeedTotal - RegisterDelay) * +- freq_tab[MemClkFreq-3] * 100) / (freq_tab[0] * 100))); +- Seed_Gross = SeedTotal / 32; +- Seed_Fine = SeedTotal & 0x1f; +- if (Seed_Gross == 0) +- Seed_Gross = 0; +- else if (Seed_Gross & 0x1) +- Seed_Gross = 1; +- else +- Seed_Gross = 2; +- pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross; +- pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine; ++ if (is_fam15h()) { ++ uint32_t RegisterDelay; ++ int32_t SeedTotal; ++ int32_t SeedTotalPreScaling; ++ uint8_t AddrCmdPrelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */ ++ ++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { ++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) { ++ if (AddrCmdPrelaunch) ++ RegisterDelay = 0x30; ++ else ++ RegisterDelay = 0x20; ++ } else { ++ RegisterDelay = 0; ++ } ++ /* Retrieve WrDqDqsEarly */ ++ AmdMemPCIReadBits(MAKE_SBDFO(0,0,24+(pDCTData->NodeId), FUN_DCT, 0xa8), 25, 24, &Value); ++ ++ /* Calculate adjusted seed values */ ++ SeedTotal = (pDCTData->WLFineDelayPrevPass[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) | ++ ((pDCTData->WLGrossDelayPrevPass[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) << 5); ++ SeedTotalPreScaling = (SeedTotal - RegisterDelay - (0x20 * Value)); ++ SeedTotal = (int32_t) (RegisterDelay + ((((int64_t) SeedTotalPreScaling) * ++ fam15h_freq_tab[MemClkFreq] * 100) / (fam15h_freq_tab[pDCTData->WLPrevMemclkFreq] * 100))); ++ ++ if (SeedTotal >= 0) { ++ Seed_Gross = SeedTotal / 32; ++ Seed_Fine = SeedTotal % 32; ++ } else { ++ Seed_Gross = (SeedTotal / 32) - 1; ++ Seed_Fine = (SeedTotal % 32) + 32; ++ } ++ ++ if (Seed_Gross == 0) ++ Seed_PreGross = 0; ++ else if (Seed_Gross & 0x1) ++ Seed_PreGross = 1; ++ else ++ Seed_PreGross = 2; ++ ++ /* Save seed values for later use */ ++ pDCTData->WLSeedGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross; ++ pDCTData->WLSeedFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine; ++ pDCTData->WLSeedPreGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_PreGross; ++ ++ pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_PreGross; ++ pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine; ++ } ++ } else { ++ u32 RegisterDelay, SeedTotal; ++ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) ++ { ++ if (pDCTData->Status[DCT_STATUS_REGISTERED]) ++ RegisterDelay = 0x20; /* TODO: ((RCW2 & BIT0) == 0) ? 0x20 : 0x30; */ ++ else ++ RegisterDelay = 0; ++ SeedTotal = (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) | ++ (pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5); ++ /* SeedTotalPreScaling = (the total delay value in F2x[1, 0]9C_x[4A:30] from pass 1 of write levelization ++ training) - RegisterDelay. */ ++ SeedTotal = (uint16_t) (RegisterDelay + ((((uint64_t) SeedTotal - RegisterDelay) * ++ fam10h_freq_tab[MemClkFreq-3] * 100) / (fam10h_freq_tab[0] * 100))); ++ Seed_Gross = SeedTotal / 32; ++ Seed_Fine = SeedTotal & 0x1f; ++ if (Seed_Gross == 0) ++ Seed_Gross = 0; ++ else if (Seed_Gross & 0x1) ++ Seed_Gross = 1; ++ else ++ Seed_Gross = 2; ++ pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross; ++ pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine; ++ } + } + } + +- setWLByteDelay(pDCTData, ByteLane, dimm, 0); ++ pDCTData->WLPrevMemclkFreq = MemClkFreq; ++ setWLByteDelay(pDCTstat, dct, ByteLane, dimm, 0, pass); + } + + /*----------------------------------------------------------------------------- +- * void setWLByteDelay(DCTStruct *DCTData, u8 ByteLane, u8 Dimm){ ++ * void setWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 Dimm){ + * + * Description: + * This function writes the write levelization byte delay for the Phase +@@ -763,8 +1203,9 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass) + * + *----------------------------------------------------------------------------- + */ +-void setWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm, u8 targetAddr) ++void setWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 dimm, u8 targetAddr, uint8_t pass) + { ++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; + u8 fineStartLoc, fineEndLoc, grossStartLoc, grossEndLoc, tempB, index, offsetAddr; + u32 addr, fineDelayValue, grossDelayValue, ValueLow, ValueHigh, EccValue, tempW; + +@@ -777,22 +1218,26 @@ void setWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm, u8 targetAddr) + EccValue = 0; + while (ByteLane < MAX_BYTE_LANES) + { +- /* This subtract 0xC workaround might be temporary. */ +- if ((pDCTData->WLPass==2) && (pDCTData->RegMan1Present & (1<<(dimm*2+pDCTData->DctTrain)))) +- { +- tempW = (pDCTData->WLGrossDelay[index+ByteLane] << 5) | pDCTData->WLFineDelay[index+ByteLane]; +- tempW -= 0xC; +- pDCTData->WLGrossDelay[index+ByteLane] = (u8)(tempW >> 5); +- pDCTData->WLFineDelay[index+ByteLane] = (u8)(tempW & 0x1F); +- } +- grossDelayValue = pDCTData->WLGrossDelay[index+ByteLane]; +- /* Adjust seed gross delay overflow (greater than 3): +- * - Program seed gross delay as 2 (gross is 4 or 6) or 1 (gross is 5). +- * - Keep original seed gross delay for later reference. +- */ +- if(grossDelayValue >= 3) +- { +- grossDelayValue = (grossDelayValue&1)? 1 : 2; ++ if (is_fam15h()) { ++ grossDelayValue = pDCTData->WLGrossDelay[index+ByteLane]; ++ } else { ++ /* This subtract 0xC workaround might be temporary. */ ++ if ((pDCTData->WLPass==2) && (pDCTData->RegMan1Present & (1<<(dimm*2+dct)))) ++ { ++ tempW = (pDCTData->WLGrossDelay[index+ByteLane] << 5) | pDCTData->WLFineDelay[index+ByteLane]; ++ tempW -= 0xC; ++ pDCTData->WLGrossDelay[index+ByteLane] = (u8)(tempW >> 5); ++ pDCTData->WLFineDelay[index+ByteLane] = (u8)(tempW & 0x1F); ++ } ++ grossDelayValue = pDCTData->WLGrossDelay[index+ByteLane]; ++ /* Adjust seed gross delay overflow (greater than 3): ++ * - Program seed gross delay as 2 (gross is 4 or 6) or 1 (gross is 5). ++ * - Keep original seed gross delay for later reference. ++ */ ++ if(grossDelayValue >= 3) ++ { ++ grossDelayValue = (grossDelayValue&1)? 1 : 2; ++ } + } + fineDelayValue = pDCTData->WLFineDelay[index+ByteLane]; + if (ByteLane < 4) +@@ -803,15 +1248,16 @@ void setWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm, u8 targetAddr) + EccValue = ((grossDelayValue << 5) | fineDelayValue); + ByteLane++; + } +- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT, ++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, + DRAM_CONT_ADD_PHASE_REC_CTRL_LOW, 0, 31, (u32)ValueLow); +- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT, ++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, + DRAM_CONT_ADD_PHASE_REC_CTRL_HIGH, 0, 31, (u32)ValueHigh); +- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT, ++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, + DRAM_CONT_ADD_ECC_PHASE_REC_CTRL, 0, 31, (u32)EccValue); + } + else + { ++ /* Fam10h BKDG Rev. 3.62 2.8.9.9.1 (6) */ + index = (u8)(MAX_BYTE_LANES * dimm); + grossDelayValue = pDCTData->WLGrossDelay[index+ByteLane]; + fineDelayValue = pDCTData->WLFineDelay[index+ByteLane]; +@@ -841,16 +1287,24 @@ void setWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm, u8 targetAddr) + grossStartLoc = (u8)(fineEndLoc + 1); + grossEndLoc = (u8)(grossStartLoc + 1); + +- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT, ++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, + (u16)addr, fineStartLoc, fineEndLoc,(u32)fineDelayValue); +- set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT, ++ set_DCT_ADDR_Bits(pDCTData, dct, pDCTData->NodeId, FUN_DCT, + (u16)addr, grossStartLoc, grossEndLoc, (u32)grossDelayValue); ++ ++ pDCTData->WLFineDelayPrevPass[index+ByteLane] = fineDelayValue; ++ pDCTData->WLGrossDelayPrevPass[index+ByteLane] = grossDelayValue; ++ if (pass == FirstPass) { ++ pDCTData->WLFineDelayFirstPass[index+ByteLane] = fineDelayValue; ++ pDCTData->WLGrossDelayFirstPass[index+ByteLane] = grossDelayValue; ++ pDCTData->WLCriticalGrossDelayFirstPass = pDCTData->WLCriticalGrossDelayPrevPass; ++ } + } + + } + + /*----------------------------------------------------------------------------- +- * void getWLByteDelay(DCTStruct *DCTData, u8 ByteLane, u8 Dimm) ++ * void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 Dimm) + * + * Description: + * This function reads the write levelization byte delay from the Phase +@@ -868,8 +1322,9 @@ void setWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm, u8 targetAddr) + * + *----------------------------------------------------------------------------- + */ +-void getWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm) ++void getWLByteDelay(struct DCTStatStruc *pDCTstat, uint8_t dct, u8 ByteLane, u8 dimm, uint8_t pass) + { ++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; + u8 fineStartLoc, fineEndLoc, grossStartLoc, grossEndLoc, tempB, tempB1, index; + u32 addr, fine, gross; + tempB = 0; +@@ -890,25 +1345,31 @@ void getWLByteDelay(sDCTStruct *pDCTData, u8 ByteLane, u8 dimm) + grossStartLoc = (u8)(fineEndLoc + 1); + grossEndLoc = (u8)(grossStartLoc + 1); + +- fine = get_ADD_DCT_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, ++ fine = get_ADD_DCT_Bits(pDCTData, dct, pDCTData->NodeId, + FUN_DCT, (u16)addr, fineStartLoc, fineEndLoc); +- gross = get_ADD_DCT_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, ++ gross = get_ADD_DCT_Bits(pDCTData, dct, pDCTData->NodeId, + FUN_DCT, (u16)addr, grossStartLoc, grossEndLoc); +- /* Adjust seed gross delay overflow (greater than 3): +- * - Adjust the trained gross delay to the original seed gross delay. +- */ +- if (pDCTData->WLGrossDelay[index+ByteLane] >= 3) { +- gross += pDCTData->WLGrossDelay[index+ByteLane]; +- if(pDCTData->WLGrossDelay[index+ByteLane] & 1) +- gross -= 1; +- else +- gross -= 2; +- } else if ((pDCTData->WLGrossDelay[index+ByteLane] == 0) && (gross == 3)) { +- /* If seed gross delay is 0 but PRE result gross delay is 3, it is negative. +- * We will then round the negative number to 0. ++ ++ if (!is_fam15h()) { ++ /* Adjust seed gross delay overflow (greater than 3): ++ * - Adjust the trained gross delay to the original seed gross delay. + */ +- gross = 0; +- fine = 0; ++ if(pDCTData->WLGrossDelay[index+ByteLane] >= 3) ++ { ++ gross += pDCTData->WLGrossDelay[index+ByteLane]; ++ if(pDCTData->WLGrossDelay[index+ByteLane] & 1) ++ gross -= 1; ++ else ++ gross -= 2; ++ } ++ else if((pDCTData->WLGrossDelay[index+ByteLane] == 0) && (gross == 3)) ++ { ++ /* If seed gross delay is 0 but PRE result gross delay is 3, it is negative. ++ * We will then round the negative number to 0. ++ */ ++ gross = 0; ++ fine = 0; ++ } + } + pDCTData->WLFineDelay[index+ByteLane] = (u8)fine; + pDCTData->WLGrossDelay[index+ByteLane] = (u8)gross; +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mutilc_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mutilc_d.c +index 0466c77..cf6afaa 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mutilc_d.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mutilc_d.c +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -134,24 +135,48 @@ static u32 get_Bits(sDCTStruct *pDCTData, + u16 offset, u8 low, u8 high) + { + u32 temp; ++ uint32_t dword; ++ + /* ASSERT(node < MAX_NODES); */ + if (dct == BOTH_DCTS) + { + /* Registers exist on DCT0 only */ ++ if (is_fam15h()) ++ { ++ /* Select DCT 0 */ ++ AmdMemPCIRead(MAKE_SBDFO(0,0,24+node,1,0x10c), &dword); ++ dword &= ~0x1; ++ AmdMemPCIWrite(MAKE_SBDFO(0,0,24+node,1,0x10c), &dword); ++ } ++ + AmdMemPCIReadBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp); + } + else + { +- if (dct == 1) ++ if (is_fam15h()) + { +- /* Write to dct 1 */ +- offset += 0x100; ++ /* Select DCT */ ++ AmdMemPCIRead(MAKE_SBDFO(0,0,24+node,1,0x10c), &dword); ++ dword &= ~0x1; ++ dword |= (dct & 0x1); ++ AmdMemPCIWrite(MAKE_SBDFO(0,0,24+node,1,0x10c), &dword); ++ ++ /* Read from the selected DCT */ + AmdMemPCIReadBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp); + } + else + { +- /* Write to dct 0 */ +- AmdMemPCIReadBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp); ++ if (dct == 1) ++ { ++ /* Read from dct 1 */ ++ offset += 0x100; ++ AmdMemPCIReadBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp); ++ } ++ else ++ { ++ /* Read from dct 0 */ ++ AmdMemPCIReadBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp); ++ } + } + } + return temp; +@@ -184,25 +209,49 @@ static void set_Bits(sDCTStruct *pDCTData, + u16 offset, u8 low, u8 high, u32 value) + { + u32 temp; ++ uint32_t dword; ++ + temp = value; + + if (dct == BOTH_DCTS) + { + /* Registers exist on DCT0 only */ ++ if (is_fam15h()) ++ { ++ /* Select DCT 0 */ ++ AmdMemPCIRead(MAKE_SBDFO(0,0,24+node,1,0x10c), &dword); ++ dword &= ~0x1; ++ AmdMemPCIWrite(MAKE_SBDFO(0,0,24+node,1,0x10c), &dword); ++ } ++ + AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp); + } + else + { +- if (dct == 1) ++ if (is_fam15h()) + { +- /* Write to dct 1 */ +- offset += 0x100; ++ /* Select DCT */ ++ AmdMemPCIRead(MAKE_SBDFO(0,0,24+node,1,0x10c), &dword); ++ dword &= ~0x1; ++ dword |= (dct & 0x1); ++ AmdMemPCIWrite(MAKE_SBDFO(0,0,24+node,1,0x10c), &dword); ++ ++ /* Write to the selected DCT */ + AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp); + } + else + { +- /* Write to dct 0 */ +- AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp); ++ if (dct == 1) ++ { ++ /* Write to dct 1 */ ++ offset += 0x100; ++ AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp); ++ } ++ else ++ { ++ /* Write to dct 0 */ ++ AmdMemPCIWriteBits(MAKE_SBDFO(0,0,24+node,func,offset), high, low, &temp); ++ } + } + } + } +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h b/src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h +index f846d87..162340e 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h ++++ b/src/northbridge/amd/amdmct/mct_ddr3/mwlc_d.h +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -33,7 +34,8 @@ + #define C_MAX_DIMMS 4 /* Maximum Number of DIMMs on each DCT */ + + /* STATUS Definition */ +-#define DCT_STATUS_REGISTERED 3 /* Registered DIMMs support */ ++#define DCT_STATUS_REGISTERED 3 /* Registered DIMMs support */ ++#define DCT_STATUS_LOAD_REDUCED 4 /* Load-Reduced DIMMs support */ + #define DCT_STATUS_OnDimmMirror 24 /* OnDimmMirror support */ + + /* PCI Defintions */ +@@ -78,12 +80,18 @@ + #define SendMrsCmd 26 + #define Qoff 12 + #define MRS_Level 7 +-#define MrsAddressStart 0 +-#define MrsAddressEnd 15 +-#define MrsBankStart 16 +-#define MrsBankEnd 18 +-#define MrsChipSelStart 20 +-#define MrsChipSelEnd 22 ++#define MrsAddressStartFam10 0 ++#define MrsAddressEndFam10 15 ++#define MrsAddressStartFam15 0 ++#define MrsAddressEndFam15 17 ++#define MrsBankStartFam10 16 ++#define MrsBankEndFam10 18 ++#define MrsBankStartFam15 18 ++#define MrsBankEndFam15 20 ++#define MrsChipSelStartFam10 20 ++#define MrsChipSelEndFam10 22 ++#define MrsChipSelStartFam15 21 ++#define MrsChipSelEndFam15 23 + #define ASR 18 + #define SRT 19 + #define DramTermDynStart 10 +@@ -115,10 +123,32 @@ typedef struct _sDCTStruct + u8 DctTrain; /* Current DCT being trained */ + u8 CurrDct; /* Current DCT number (0 or 1) */ + u8 DctCSPresent; /* Current DCT CS mapping */ ++ int32_t WLSeedGrossDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Seed Gross Delay */ ++ /* per byte Lane Per Logical DIMM*/ ++ int32_t WLSeedFineDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Seed Fine Delay */ ++ /* per byte Lane Per Logical DIMM*/ ++ int32_t WLSeedPreGrossDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Seed Pre-Gross Delay */ ++ /* per byte Lane Per Logical DIMM*/ + u8 WLGrossDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Gross Delay */ + /* per byte Lane Per Logical DIMM*/ + u8 WLFineDelay[MAX_BYTE_LANES*MAX_LDIMMS]; /* Write Levelization Fine Delay */ + /* per byte Lane Per Logical DIMM*/ ++ u8 WLGrossDelayFirstPass[MAX_BYTE_LANES*MAX_LDIMMS]; /* First-Pass Write Levelization Gross Delay */ ++ /* per byte Lane Per Logical DIMM*/ ++ u8 WLFineDelayFirstPass[MAX_BYTE_LANES*MAX_LDIMMS]; /* First-Pass Write Levelization Fine Delay */ ++ /* per byte Lane Per Logical DIMM*/ ++ u8 WLGrossDelayPrevPass[MAX_BYTE_LANES*MAX_LDIMMS]; /* Previous Pass Write Levelization Gross Delay */ ++ /* per byte Lane Per Logical DIMM*/ ++ u8 WLFineDelayPrevPass[MAX_BYTE_LANES*MAX_LDIMMS]; /* Previous Pass Write Levelization Fine Delay */ ++ /* per byte Lane Per Logical DIMM*/ ++ u8 WLGrossDelayFinalPass[MAX_BYTE_LANES*MAX_LDIMMS]; /* Final-Pass Write Levelization Gross Delay */ ++ /* per byte Lane Per Logical DIMM*/ ++ u8 WLFineDelayFinalPass[MAX_BYTE_LANES*MAX_LDIMMS]; /* Final-Pass Write Levelization Fine Delay */ ++ /* per byte Lane Per Logical DIMM*/ ++ int32_t WLCriticalGrossDelayFirstPass; ++ int32_t WLCriticalGrossDelayPrevPass; ++ int32_t WLCriticalGrossDelayFinalPass; ++ uint16_t WLPrevMemclkFreq; + u16 RegMan1Present; + u8 DimmPresent[MAX_TOTAL_DIMMS];/* Indicates which DIMMs are present */ + /* from Total Number of DIMMs(per Node)*/ +@@ -132,7 +162,7 @@ typedef struct _sDCTStruct + /* per byte lane */ + u8 MaxDimmsInstalled; /* Max Dimms Installed for current DCT */ + u8 DimmRanks[MAX_TOTAL_DIMMS]; /* Total Number of Ranks(per Dimm) */ +- u32 LogicalCPUID; ++ uint64_t LogicalCPUID; + u8 WLPass; + } sDCTStruct; + +diff --git a/src/northbridge/amd/amdmct/mct_ddr3/s3utils.c b/src/northbridge/amd/amdmct/mct_ddr3/s3utils.c +index c9bcac1..aa23951 100644 +--- a/src/northbridge/amd/amdmct/mct_ddr3/s3utils.c ++++ b/src/northbridge/amd/amdmct/mct_ddr3/s3utils.c +@@ -18,6 +18,7 @@ + */ + + #include <string.h> ++#include <arch/cpu.h> + #include <arch/acpi.h> + #include <cpu/x86/msr.h> + #include <device/device.h> +@@ -32,6 +33,23 @@ + + #define S3NV_FILE_NAME "s3nv" + ++#ifdef __RAMSTAGE__ ++static inline uint8_t is_fam15h(void) ++{ ++ uint8_t fam15h = 0; ++ uint32_t family; ++ ++ family = cpuid_eax(0x80000001); ++ family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8); ++ ++ if (family >= 0x6f) ++ /* Family 15h or later */ ++ fam15h = 1; ++ ++ return fam15h; ++} ++#endif ++ + static ssize_t get_s3nv_file_offset(void); + + ssize_t get_s3nv_file_offset(void) +@@ -47,6 +65,28 @@ ssize_t get_s3nv_file_offset(void) + return s3nv_region.region.offset; + } + ++static uint32_t read_config32_dct(device_t dev, uint8_t node, uint8_t dct, uint32_t reg) { ++ if (is_fam15h()) { ++ uint32_t dword; ++#ifdef __PRE_RAM__ ++ device_t dev_fn1 = PCI_DEV(0, 0x18 + node, 1); ++#else ++ device_t dev_fn1 = dev_find_slot(0, PCI_DEVFN(0x18 + node, 1)); ++#endif ++ ++ /* Select DCT */ ++ dword = pci_read_config32(dev_fn1, 0x10c); ++ dword &= ~0x1; ++ dword |= (dct & 0x1); ++ pci_write_config32(dev_fn1, 0x10c, dword); ++ } else { ++ /* Apply offset */ ++ reg += dct * 0x100; ++ } ++ ++ return pci_read_config32(dev, reg); ++} ++ + static uint32_t read_amd_dct_index_register(device_t dev, uint32_t index_ctl_reg, uint32_t index) + { + uint32_t dword; +@@ -61,12 +101,54 @@ static uint32_t read_amd_dct_index_register(device_t dev, uint32_t index_ctl_reg + return dword; + } + ++static uint32_t read_amd_dct_index_register_dct(device_t dev, uint8_t node, uint8_t dct, uint32_t index_ctl_reg, uint32_t index) ++{ ++ if (is_fam15h()) { ++ uint32_t dword; ++#ifdef __PRE_RAM__ ++ device_t dev_fn1 = PCI_DEV(0, 0x18 + node, 1); ++#else ++ device_t dev_fn1 = dev_find_slot(0, PCI_DEVFN(0x18 + node, 1)); ++#endif ++ ++ /* Select DCT */ ++ dword = pci_read_config32(dev_fn1, 0x10c); ++ dword &= ~0x1; ++ dword |= (dct & 0x1); ++ pci_write_config32(dev_fn1, 0x10c, dword); ++ } else { ++ /* Apply offset */ ++ index_ctl_reg += dct * 0x100; ++ } ++ ++ return read_amd_dct_index_register(dev, index_ctl_reg, index); ++} ++ + #ifdef __RAMSTAGE__ + static uint64_t rdmsr_uint64_t(unsigned long index) { + msr_t msr = rdmsr(index); + return (((uint64_t)msr.hi) << 32) | ((uint64_t)msr.lo); + } + ++static uint32_t read_config32_dct_nbpstate(device_t dev, uint8_t node, uint8_t dct, uint8_t nb_pstate, uint32_t reg) { ++ uint32_t dword; ++ device_t dev_fn1 = dev_find_slot(0, PCI_DEVFN(0x18 + node, 1)); ++ ++ /* Select DCT */ ++ dword = pci_read_config32(dev_fn1, 0x10c); ++ dword &= ~0x1; ++ dword |= (dct & 0x1); ++ pci_write_config32(dev_fn1, 0x10c, dword); ++ ++ /* Select NB Pstate index */ ++ dword = pci_read_config32(dev_fn1, 0x10c); ++ dword &= ~(0x3 << 4); ++ dword |= (nb_pstate & 0x3) << 4; ++ pci_write_config32(dev_fn1, 0x10c, dword); ++ ++ return pci_read_config32(dev, reg); ++} ++ + void copy_mct_data_to_save_variable(struct amd_s3_persistent_data* persistent_data) + { + uint8_t i; +@@ -82,7 +164,8 @@ void copy_mct_data_to_save_variable(struct amd_s3_persistent_data* persistent_da + device_t dev_fn1 = dev_find_slot(0, PCI_DEVFN(0x18 + node, 1)); + device_t dev_fn2 = dev_find_slot(0, PCI_DEVFN(0x18 + node, 2)); + device_t dev_fn3 = dev_find_slot(0, PCI_DEVFN(0x18 + node, 3)); +- if ((!dev_fn1) || (!dev_fn2) || (!dev_fn3)) { ++ /* Test for node presence */ ++ if ((!dev_fn1) || (pci_read_config32(dev_fn1, PCI_VENDOR_ID) == 0xffffffff)) { + persistent_data->node[node].node_present = 0; + continue; + } +@@ -95,22 +178,22 @@ void copy_mct_data_to_save_variable(struct amd_s3_persistent_data* persistent_da + data->f2x110 = pci_read_config32(dev_fn2, 0x110); + + /* Stage 2 */ +- data->f1x40 = pci_read_config32(dev_fn1, 0x40 + (0x100 * channel)); +- data->f1x44 = pci_read_config32(dev_fn1, 0x44 + (0x100 * channel)); +- data->f1x48 = pci_read_config32(dev_fn1, 0x48 + (0x100 * channel)); +- data->f1x4c = pci_read_config32(dev_fn1, 0x4c + (0x100 * channel)); +- data->f1x50 = pci_read_config32(dev_fn1, 0x50 + (0x100 * channel)); +- data->f1x54 = pci_read_config32(dev_fn1, 0x54 + (0x100 * channel)); +- data->f1x58 = pci_read_config32(dev_fn1, 0x58 + (0x100 * channel)); +- data->f1x5c = pci_read_config32(dev_fn1, 0x5c + (0x100 * channel)); +- data->f1x60 = pci_read_config32(dev_fn1, 0x60 + (0x100 * channel)); +- data->f1x64 = pci_read_config32(dev_fn1, 0x64 + (0x100 * channel)); +- data->f1x68 = pci_read_config32(dev_fn1, 0x68 + (0x100 * channel)); +- data->f1x6c = pci_read_config32(dev_fn1, 0x6c + (0x100 * channel)); +- data->f1x70 = pci_read_config32(dev_fn1, 0x70 + (0x100 * channel)); +- data->f1x74 = pci_read_config32(dev_fn1, 0x74 + (0x100 * channel)); +- data->f1x78 = pci_read_config32(dev_fn1, 0x78 + (0x100 * channel)); +- data->f1x7c = pci_read_config32(dev_fn1, 0x7c + (0x100 * channel)); ++ data->f1x40 = read_config32_dct(dev_fn1, node, channel, 0x40); ++ data->f1x44 = read_config32_dct(dev_fn1, node, channel, 0x44); ++ data->f1x48 = read_config32_dct(dev_fn1, node, channel, 0x48); ++ data->f1x4c = read_config32_dct(dev_fn1, node, channel, 0x4c); ++ data->f1x50 = read_config32_dct(dev_fn1, node, channel, 0x50); ++ data->f1x54 = read_config32_dct(dev_fn1, node, channel, 0x54); ++ data->f1x58 = read_config32_dct(dev_fn1, node, channel, 0x58); ++ data->f1x5c = read_config32_dct(dev_fn1, node, channel, 0x5c); ++ data->f1x60 = read_config32_dct(dev_fn1, node, channel, 0x60); ++ data->f1x64 = read_config32_dct(dev_fn1, node, channel, 0x64); ++ data->f1x68 = read_config32_dct(dev_fn1, node, channel, 0x68); ++ data->f1x6c = read_config32_dct(dev_fn1, node, channel, 0x6c); ++ data->f1x70 = read_config32_dct(dev_fn1, node, channel, 0x70); ++ data->f1x74 = read_config32_dct(dev_fn1, node, channel, 0x74); ++ data->f1x78 = read_config32_dct(dev_fn1, node, channel, 0x78); ++ data->f1x7c = read_config32_dct(dev_fn1, node, channel, 0x7c); + data->f1xf0 = pci_read_config32(dev_fn1, 0xf0); + data->f1x120 = pci_read_config32(dev_fn1, 0x120); + data->f1x124 = pci_read_config32(dev_fn1, 0x124); +@@ -134,75 +217,144 @@ void copy_mct_data_to_save_variable(struct amd_s3_persistent_data* persistent_da + data->msrc001001f = rdmsr_uint64_t(0xc001001f); + + /* Stage 3 */ +- data->f2x40 = pci_read_config32(dev_fn2, 0x40 + (0x100 * channel)); +- data->f2x44 = pci_read_config32(dev_fn2, 0x44 + (0x100 * channel)); +- data->f2x48 = pci_read_config32(dev_fn2, 0x48 + (0x100 * channel)); +- data->f2x4c = pci_read_config32(dev_fn2, 0x4c + (0x100 * channel)); +- data->f2x50 = pci_read_config32(dev_fn2, 0x50 + (0x100 * channel)); +- data->f2x54 = pci_read_config32(dev_fn2, 0x54 + (0x100 * channel)); +- data->f2x58 = pci_read_config32(dev_fn2, 0x58 + (0x100 * channel)); +- data->f2x5c = pci_read_config32(dev_fn2, 0x5c + (0x100 * channel)); +- data->f2x60 = pci_read_config32(dev_fn2, 0x60 + (0x100 * channel)); +- data->f2x64 = pci_read_config32(dev_fn2, 0x64 + (0x100 * channel)); +- data->f2x68 = pci_read_config32(dev_fn2, 0x68 + (0x100 * channel)); +- data->f2x6c = pci_read_config32(dev_fn2, 0x6c + (0x100 * channel)); +- data->f2x78 = pci_read_config32(dev_fn2, 0x78 + (0x100 * channel)); +- data->f2x7c = pci_read_config32(dev_fn2, 0x7c + (0x100 * channel)); +- data->f2x80 = pci_read_config32(dev_fn2, 0x80 + (0x100 * channel)); +- data->f2x84 = pci_read_config32(dev_fn2, 0x84 + (0x100 * channel)); +- data->f2x88 = pci_read_config32(dev_fn2, 0x88 + (0x100 * channel)); +- data->f2x8c = pci_read_config32(dev_fn2, 0x8c + (0x100 * channel)); +- data->f2x90 = pci_read_config32(dev_fn2, 0x90 + (0x100 * channel)); +- data->f2xa4 = pci_read_config32(dev_fn2, 0xa4 + (0x100 * channel)); +- data->f2xa8 = pci_read_config32(dev_fn2, 0xa8 + (0x100 * channel)); ++ data->f2x40 = read_config32_dct(dev_fn2, node, channel, 0x40); ++ data->f2x44 = read_config32_dct(dev_fn2, node, channel, 0x44); ++ data->f2x48 = read_config32_dct(dev_fn2, node, channel, 0x48); ++ data->f2x4c = read_config32_dct(dev_fn2, node, channel, 0x4c); ++ data->f2x50 = read_config32_dct(dev_fn2, node, channel, 0x50); ++ data->f2x54 = read_config32_dct(dev_fn2, node, channel, 0x54); ++ data->f2x58 = read_config32_dct(dev_fn2, node, channel, 0x58); ++ data->f2x5c = read_config32_dct(dev_fn2, node, channel, 0x5c); ++ data->f2x60 = read_config32_dct(dev_fn2, node, channel, 0x60); ++ data->f2x64 = read_config32_dct(dev_fn2, node, channel, 0x64); ++ data->f2x68 = read_config32_dct(dev_fn2, node, channel, 0x68); ++ data->f2x6c = read_config32_dct(dev_fn2, node, channel, 0x6c); ++ data->f2x78 = read_config32_dct(dev_fn2, node, channel, 0x78); ++ data->f2x7c = read_config32_dct(dev_fn2, node, channel, 0x7c); ++ data->f2x80 = read_config32_dct(dev_fn2, node, channel, 0x80); ++ data->f2x84 = read_config32_dct(dev_fn2, node, channel, 0x84); ++ data->f2x88 = read_config32_dct(dev_fn2, node, channel, 0x88); ++ data->f2x8c = read_config32_dct(dev_fn2, node, channel, 0x8c); ++ data->f2x90 = read_config32_dct(dev_fn2, node, channel, 0x90); ++ data->f2xa4 = read_config32_dct(dev_fn2, node, channel, 0xa4); ++ data->f2xa8 = read_config32_dct(dev_fn2, node, channel, 0xa8); ++ ++ /* Family 15h-specific configuration */ ++ if (is_fam15h()) { ++ data->f2x200 = read_config32_dct(dev_fn2, node, channel, 0x200); ++ data->f2x204 = read_config32_dct(dev_fn2, node, channel, 0x204); ++ data->f2x208 = read_config32_dct(dev_fn2, node, channel, 0x208); ++ data->f2x20c = read_config32_dct(dev_fn2, node, channel, 0x20c); ++ for (i=0; i<4; i++) ++ data->f2x210[i] = read_config32_dct_nbpstate(dev_fn2, node, channel, i, 0x210); ++ data->f2x214 = read_config32_dct(dev_fn2, node, channel, 0x214); ++ data->f2x218 = read_config32_dct(dev_fn2, node, channel, 0x218); ++ data->f2x21c = read_config32_dct(dev_fn2, node, channel, 0x21c); ++ data->f2x22c = read_config32_dct(dev_fn2, node, channel, 0x22c); ++ data->f2x230 = read_config32_dct(dev_fn2, node, channel, 0x230); ++ data->f2x234 = read_config32_dct(dev_fn2, node, channel, 0x234); ++ data->f2x238 = read_config32_dct(dev_fn2, node, channel, 0x238); ++ data->f2x23c = read_config32_dct(dev_fn2, node, channel, 0x23c); ++ data->f2x240 = read_config32_dct(dev_fn2, node, channel, 0x240); ++ ++ data->f2x9cx0d0fe003 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fe003); ++ data->f2x9cx0d0fe013 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fe013); ++ for (i=0; i<9; i++) ++ data->f2x9cx0d0f0_8_0_1f[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f001f | (i << 8)); ++ data->f2x9cx0d0f201f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f201f); ++ data->f2x9cx0d0f211f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f211f); ++ data->f2x9cx0d0f221f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f221f); ++ data->f2x9cx0d0f801f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f801f); ++ data->f2x9cx0d0f811f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f811f); ++ data->f2x9cx0d0f821f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f821f); ++ data->f2x9cx0d0fc01f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc01f); ++ data->f2x9cx0d0fc11f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc11f); ++ data->f2x9cx0d0fc21f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc21f); ++ data->f2x9cx0d0f4009 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f4009); ++ for (i=0; i<9; i++) ++ data->f2x9cx0d0f0_8_0_02[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f0002 | (i << 8)); ++ for (i=0; i<9; i++) ++ data->f2x9cx0d0f0_8_0_06[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f0006 | (i << 8)); ++ for (i=0; i<9; i++) ++ data->f2x9cx0d0f0_8_0_0a[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f000a | (i << 8)); ++ ++ data->f2x9cx0d0f2002 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f2002); ++ data->f2x9cx0d0f2102 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f2102); ++ data->f2x9cx0d0f2202 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f2202); ++ data->f2x9cx0d0f8002 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f8002); ++ data->f2x9cx0d0f8006 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f8006); ++ data->f2x9cx0d0f800a = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f800a); ++ data->f2x9cx0d0f8102 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f8102); ++ data->f2x9cx0d0f8106 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f8106); ++ data->f2x9cx0d0f810a = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f810a); ++ data->f2x9cx0d0fc002 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc002); ++ data->f2x9cx0d0fc006 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc006); ++ data->f2x9cx0d0fc00a = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc00a); ++ data->f2x9cx0d0fc00e = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc00e); ++ data->f2x9cx0d0fc012 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc012); ++ ++ data->f2x9cx0d0f2031 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f2031); ++ data->f2x9cx0d0f2131 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f2131); ++ data->f2x9cx0d0f2231 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f2231); ++ data->f2x9cx0d0f8031 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f8031); ++ data->f2x9cx0d0f8131 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f8131); ++ data->f2x9cx0d0f8231 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f8231); ++ data->f2x9cx0d0fc031 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc031); ++ data->f2x9cx0d0fc131 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc131); ++ data->f2x9cx0d0fc231 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fc231); ++ for (i=0; i<9; i++) ++ data->f2x9cx0d0f0_0_f_31[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f0031 | (i << 8)); ++ ++ data->f2x9cx0d0f8021 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f8021); ++ } + + /* Stage 4 */ +- data->f2x94 = pci_read_config32(dev_fn2, 0x94 + (0x100 * channel)); ++ data->f2x94 = read_config32_dct(dev_fn2, node, channel, 0x94); + + /* Stage 6 */ + for (i=0; i<9; i++) + for (j=0; j<3; j++) +- data->f2x9cx0d0f0_f_8_0_0_8_4_0[i][j] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0d0f0000 | (i << 8) | (j * 4)); +- data->f2x9cx00 = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x00); +- data->f2x9cx0a = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0a); +- data->f2x9cx0c = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0c); ++ data->f2x9cx0d0f0_f_8_0_0_8_4_0[i][j] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f0000 | (i << 8) | (j * 4)); ++ data->f2x9cx00 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x00); ++ data->f2x9cx0a = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0a); ++ data->f2x9cx0c = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0c); + + /* Stage 7 */ +- data->f2x9cx04 = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x04); ++ data->f2x9cx04 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x04); + + /* Stage 9 */ +- data->f2x9cx0d0fe006 = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0d0fe006); +- data->f2x9cx0d0fe007 = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0d0fe007); ++ data->f2x9cx0d0fe006 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fe006); ++ data->f2x9cx0d0fe007 = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0fe007); + + /* Stage 10 */ + for (i=0; i<12; i++) +- data->f2x9cx10[i] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x10 + i); ++ data->f2x9cx10[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x10 + i); + for (i=0; i<12; i++) +- data->f2x9cx20[i] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x20 + i); ++ data->f2x9cx20[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x20 + i); + for (i=0; i<4; i++) + for (j=0; j<3; j++) +- data->f2x9cx3_0_0_3_1[i][j] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), (0x01 + i) + (0x100 * j)); ++ data->f2x9cx3_0_0_3_1[i][j] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, (0x01 + i) + (0x100 * j)); + for (i=0; i<4; i++) + for (j=0; j<3; j++) +- data->f2x9cx3_0_0_7_5[i][j] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), (0x05 + i) + (0x100 * j)); +- data->f2x9cx0d = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0d); ++ data->f2x9cx3_0_0_7_5[i][j] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, (0x05 + i) + (0x100 * j)); ++ data->f2x9cx0d = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d); + for (i=0; i<9; i++) +- data->f2x9cx0d0f0_f_0_13[i] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0d0f0013 | (i << 8)); ++ data->f2x9cx0d0f0_f_0_13[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f0013 | (i << 8)); + for (i=0; i<9; i++) +- data->f2x9cx0d0f0_f_0_30[i] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0d0f0030 | (i << 8)); ++ data->f2x9cx0d0f0_f_0_30[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f0030 | (i << 8)); + for (i=0; i<4; i++) +- data->f2x9cx0d0f2_f_0_30[i] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0d0f2030 | (i << 8)); ++ data->f2x9cx0d0f2_f_0_30[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f2030 | (i << 8)); + for (i=0; i<2; i++) + for (j=0; j<3; j++) +- data->f2x9cx0d0f8_8_4_0[i][j] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0d0f0000 | (i << 8) | (j * 4)); +- data->f2x9cx0d0f812f = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x0d0f812f); ++ data->f2x9cx0d0f8_8_4_0[i][j] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f0000 | (i << 8) | (j * 4)); ++ data->f2x9cx0d0f812f = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x0d0f812f); + + /* Stage 11 */ + if (IS_ENABLED(CONFIG_DIMM_DDR3)) { + for (i=0; i<12; i++) +- data->f2x9cx30[i] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x30 + i); ++ data->f2x9cx30[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x30 + i); + for (i=0; i<12; i++) +- data->f2x9cx40[i] = read_amd_dct_index_register(dev_fn2, 0x98 + (0x100 * channel), 0x40 + i); ++ data->f2x9cx40[i] = read_amd_dct_index_register_dct(dev_fn2, node, channel, 0x98, 0x40 + i); + } + + /* Other */ +@@ -212,6 +364,43 @@ void copy_mct_data_to_save_variable(struct amd_s3_persistent_data* persistent_da + } + } + #else ++static void write_config32_dct(device_t dev, uint8_t node, uint8_t dct, uint32_t reg, uint32_t value) { ++ if (is_fam15h()) { ++ uint32_t dword; ++ device_t dev_fn1 = PCI_DEV(0, 0x18 + node, 1); ++ ++ /* Select DCT */ ++ dword = pci_read_config32(dev_fn1, 0x10c); ++ dword &= ~0x1; ++ dword |= (dct & 0x1); ++ pci_write_config32(dev_fn1, 0x10c, dword); ++ } else { ++ /* Apply offset */ ++ reg += dct * 0x100; ++ } ++ ++ pci_write_config32(dev, reg, value); ++} ++ ++static void write_config32_dct_nbpstate(device_t dev, uint8_t node, uint8_t dct, uint8_t nb_pstate, uint32_t reg, uint32_t value) { ++ uint32_t dword; ++ device_t dev_fn1 = PCI_DEV(0, 0x18 + node, 1); ++ ++ /* Select DCT */ ++ dword = pci_read_config32(dev_fn1, 0x10c); ++ dword &= ~0x1; ++ dword |= (dct & 0x1); ++ pci_write_config32(dev_fn1, 0x10c, dword); ++ ++ /* Select NB Pstate index */ ++ dword = pci_read_config32(dev_fn1, 0x10c); ++ dword &= ~(0x3 << 4); ++ dword |= (nb_pstate & 0x3) << 4; ++ pci_write_config32(dev_fn1, 0x10c, dword); ++ ++ pci_write_config32(dev, reg, value); ++} ++ + static void write_amd_dct_index_register(device_t dev, uint32_t index_ctl_reg, uint32_t index, uint32_t value) + { + uint32_t dword; +@@ -223,6 +412,25 @@ static void write_amd_dct_index_register(device_t dev, uint32_t index_ctl_reg, u + dword = pci_read_config32(dev, index_ctl_reg); + } while (!(dword & (1 << 31))); + } ++ ++static void write_amd_dct_index_register_dct(device_t dev, uint8_t node, uint8_t dct, uint32_t index_ctl_reg, uint32_t index, uint32_t value) ++{ ++ if (is_fam15h()) { ++ uint32_t dword; ++ device_t dev_fn1 = PCI_DEV(0, 0x18 + node, 1); ++ ++ /* Select DCT */ ++ dword = pci_read_config32(dev_fn1, 0x10c); ++ dword &= ~0x1; ++ dword |= (dct & 0x1); ++ pci_write_config32(dev_fn1, 0x10c, dword); ++ } else { ++ /* Apply offset */ ++ index_ctl_reg += dct * 0x100; ++ } ++ ++ return write_amd_dct_index_register(dev, index_ctl_reg, index, value); ++} + #endif + + #ifdef __PRE_RAM__ +@@ -262,31 +470,31 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste + if (!persistent_data->node[node].node_present) + continue; + +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x40 + (0x100 * channel), data->f1x40); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x44 + (0x100 * channel), data->f1x44); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x48 + (0x100 * channel), data->f1x48); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x4c + (0x100 * channel), data->f1x4c); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x50 + (0x100 * channel), data->f1x50); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x54 + (0x100 * channel), data->f1x54); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x58 + (0x100 * channel), data->f1x58); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x5c + (0x100 * channel), data->f1x5c); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x60 + (0x100 * channel), data->f1x60); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x64 + (0x100 * channel), data->f1x64); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x68 + (0x100 * channel), data->f1x68); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x6c + (0x100 * channel), data->f1x6c); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x70 + (0x100 * channel), data->f1x70); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x74 + (0x100 * channel), data->f1x74); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x78 + (0x100 * channel), data->f1x78); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x7c + (0x100 * channel), data->f1x7c); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0xf0 + (0x100 * channel), data->f1xf0); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x120 + (0x100 * channel), data->f1x120); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 1), 0x124 + (0x100 * channel), data->f1x124); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x10c + (0x100 * channel), data->f2x10c); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x114 + (0x100 * channel), data->f2x114); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x118 + (0x100 * channel), data->f2x118); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x11c + (0x100 * channel), data->f2x11c); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x1b0 + (0x100 * channel), data->f2x1b0); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 3), 0x44 + (0x100 * channel), data->f3x44); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x40, data->f1x40); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x44, data->f1x44); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x48, data->f1x48); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x4c, data->f1x4c); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x50, data->f1x50); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x54, data->f1x54); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x58, data->f1x58); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x5c, data->f1x5c); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x60, data->f1x60); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x64, data->f1x64); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x68, data->f1x68); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x6c, data->f1x6c); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x70, data->f1x70); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x74, data->f1x74); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x78, data->f1x78); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x7c, data->f1x7c); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0xf0, data->f1xf0); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x120, data->f1x120); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 1), node, channel, 0x124, data->f1x124); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x10c, data->f2x10c); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x114, data->f2x114); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x118, data->f2x118); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x11c, data->f2x11c); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x1b0, data->f2x1b0); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 3), node, channel, 0x44, data->f3x44); + for (i=0; i<16; i++) { + wrmsr_uint64_t(0x00000200 | i, data->msr0000020[i]); + } +@@ -313,31 +521,97 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste + if (!persistent_data->node[node].node_present) + continue; + +- ganged = !!(data->f2x110 & 0x10); ++ if (is_fam15h()) ++ ganged = 0; ++ else ++ ganged = !!(data->f2x110 & 0x10); + if ((ganged == 1) && (channel > 0)) + continue; + +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x40 + (0x100 * channel), data->f2x40); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x44 + (0x100 * channel), data->f2x44); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x48 + (0x100 * channel), data->f2x48); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x4c + (0x100 * channel), data->f2x4c); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x50 + (0x100 * channel), data->f2x50); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x54 + (0x100 * channel), data->f2x54); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x58 + (0x100 * channel), data->f2x58); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x5c + (0x100 * channel), data->f2x5c); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x60 + (0x100 * channel), data->f2x60); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x64 + (0x100 * channel), data->f2x64); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x68 + (0x100 * channel), data->f2x68); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x6c + (0x100 * channel), data->f2x6c); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x78 + (0x100 * channel), data->f2x78); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x7c + (0x100 * channel), data->f2x7c); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x80 + (0x100 * channel), data->f2x80); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x84 + (0x100 * channel), data->f2x84); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x88 + (0x100 * channel), data->f2x88); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x8c + (0x100 * channel), data->f2x8c); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x90 + (0x100 * channel), data->f2x90); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0xa4 + (0x100 * channel), data->f2xa4); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0xa8 + (0x100 * channel), data->f2xa8); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x40, data->f2x40); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x44, data->f2x44); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x48, data->f2x48); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x4c, data->f2x4c); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x50, data->f2x50); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x54, data->f2x54); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x58, data->f2x58); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x5c, data->f2x5c); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x60, data->f2x60); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x64, data->f2x64); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x68, data->f2x68); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x6c, data->f2x6c); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x78, data->f2x78); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x7c, data->f2x7c); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x80, data->f2x80); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x84, data->f2x84); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x88, data->f2x88); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x8c, data->f2x8c); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x90, data->f2x90); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0xa4, data->f2xa4); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0xa8, data->f2xa8); ++ } ++ } ++ ++ /* Family 15h-specific configuration */ ++ if (is_fam15h()) { ++ for (node = 0; node < MAX_NODES_SUPPORTED; node++) { ++ for (channel = 0; channel < 2; channel++) { ++ struct amd_s3_persistent_mct_channel_data* data = &persistent_data->node[node].channel[channel]; ++ if (!persistent_data->node[node].node_present) ++ continue; ++ ++ /* Initialize DCT */ ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0000000b, 0x80000000); ++ dword = read_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe013); ++ dword &= ~0xffff; ++ dword |= 0x118; ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe013, dword); ++ ++ /* Restore values */ ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x200, data->f2x200); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x204, data->f2x204); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x208, data->f2x208); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x20c, data->f2x20c); ++ for (i=0; i<4; i++) ++ write_config32_dct_nbpstate(PCI_DEV(0, 0x18 + node, 2), node, channel, i, 0x210, data->f2x210[i]); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x214, data->f2x214); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x218, data->f2x218); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x21c, data->f2x21c); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x22c, data->f2x22c); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x230, data->f2x230); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x234, data->f2x234); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x238, data->f2x238); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x23c, data->f2x23c); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x240, data->f2x240); ++ ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe013, data->f2x9cx0d0fe013); ++ for (i=0; i<9; i++) ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f001f | (i << 8), data->f2x9cx0d0f0_8_0_1f[i]); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f201f, data->f2x9cx0d0f201f); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f211f, data->f2x9cx0d0f211f); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f221f, data->f2x9cx0d0f221f); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f801f, data->f2x9cx0d0f801f); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f811f, data->f2x9cx0d0f811f); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f821f, data->f2x9cx0d0f821f); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc01f, data->f2x9cx0d0fc01f); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc11f, data->f2x9cx0d0fc11f); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc21f, data->f2x9cx0d0fc21f); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f4009, data->f2x9cx0d0f4009); ++ ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f2031, data->f2x9cx0d0f2031); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f2131, data->f2x9cx0d0f2131); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f2231, data->f2x9cx0d0f2231); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f8031, data->f2x9cx0d0f8031); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f8131, data->f2x9cx0d0f8131); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f8231, data->f2x9cx0d0f8231); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc031, data->f2x9cx0d0fc031); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc131, data->f2x9cx0d0fc131); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc231, data->f2x9cx0d0fc231); ++ for (i=0; i<9; i++) ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f0031 | (i << 8), data->f2x9cx0d0f0_0_f_31[i]); ++ ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f8021, data->f2x9cx0d0f8021); ++ } + } + } + +@@ -348,33 +622,44 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste + if (!persistent_data->node[node].node_present) + continue; + +- ganged = !!(data->f2x110 & 0x10); ++ if (is_fam15h()) ++ ganged = 0; ++ else ++ ganged = !!(data->f2x110 & 0x10); + if ((ganged == 1) && (channel > 0)) + continue; + +- /* Disable PHY auto-compensation engine */ +- dword = read_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x08); +- if (!(dword & (1 << 30))) { +- dword |= (1 << 30); +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x08, dword); +- +- /* Wait for 5us */ +- mct_Wait(100); ++ if (is_fam15h()) { ++ /* Program PllLockTime = 0x190 */ ++ dword = read_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe006); ++ dword &= ~0xffff; ++ dword |= 0x190; ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe006, dword); ++ ++ /* Program MemClkFreqVal = 0 */ ++ dword = read_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x94); ++ dword &= (0x1 << 7); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x94, dword); ++ ++ /* Restore DRAM Adddress/Timing Control Register */ ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x04, data->f2x9cx04); ++ } else { ++ /* Disable PHY auto-compensation engine */ ++ dword = read_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x08); ++ if (!(dword & (1 << 30))) { ++ dword |= (1 << 30); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x08, dword); ++ ++ /* Wait for 5us */ ++ mct_Wait(100); ++ } + } + + /* Restore DRAM Configuration High Register */ +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x94 + (0x100 * channel), data->f2x94); +- +- /* Enable PHY auto-compensation engine */ +- dword = read_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x08); +- dword &= ~(1 << 30); +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x08, dword); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x94, data->f2x94); + } + } + +- /* Wait for 750us */ +- mct_Wait(15000); +- + /* Stage 5 */ + for (node = 0; node < MAX_NODES_SUPPORTED; node++) { + for (channel = 0; channel < 2; channel++) { +@@ -382,17 +667,40 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste + if (!persistent_data->node[node].node_present) + continue; + +- ganged = !!(data->f2x110 & 0x10); ++ if (is_fam15h()) ++ ganged = 0; ++ else ++ ganged = !!(data->f2x110 & 0x10); + if ((ganged == 1) && (channel > 0)) + continue; + ++ dct_enabled = !(data->f2x94 & (1 << 14)); ++ if (!dct_enabled) ++ continue; ++ + /* Wait for any pending PHY frequency changes to complete */ + do { +- dword = read_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x08); ++ dword = read_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x94); + } while (dword & (1 << 21)); ++ ++ if (is_fam15h()) { ++ /* Program PllLockTime = 0xf */ ++ dword = read_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe006); ++ dword &= ~0xffff; ++ dword |= 0xf; ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe006, dword); ++ } else { ++ /* Enable PHY auto-compensation engine */ ++ dword = read_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x08); ++ dword &= ~(1 << 30); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x08, dword); ++ } + } + } + ++ /* Wait for 750us */ ++ mct_Wait(15000); ++ + /* Stage 6 */ + for (node = 0; node < MAX_NODES_SUPPORTED; node++) { + for (channel = 0; channel < 2; channel++) { +@@ -402,10 +710,49 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste + + for (i=0; i<9; i++) + for (j=0; j<3; j++) +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0d0f0000 | (i << 8) | (j * 4), data->f2x9cx0d0f0_f_8_0_0_8_4_0[i][j]); +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x00, data->f2x9cx00); +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0a, data->f2x9cx0a); +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0c, data->f2x9cx0c); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f0000 | (i << 8) | (j * 4), data->f2x9cx0d0f0_f_8_0_0_8_4_0[i][j]); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x00, data->f2x9cx00); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0a, data->f2x9cx0a); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0c, data->f2x9cx0c); ++ } ++ } ++ ++ /* Family 15h-specific configuration */ ++ if (is_fam15h()) { ++ for (node = 0; node < MAX_NODES_SUPPORTED; node++) { ++ for (channel = 0; channel < 2; channel++) { ++ struct amd_s3_persistent_mct_channel_data* data = &persistent_data->node[node].channel[channel]; ++ if (!persistent_data->node[node].node_present) ++ continue; ++ ++ dword = read_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe003); ++ dword |= (0x3 << 13); /* DisAutoComp, DisablePredriverCal = 1 */ ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe003, dword); ++ ++ for (i=0; i<9; i++) ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f0006 | (i << 8), data->f2x9cx0d0f0_8_0_06[i]); ++ for (i=0; i<9; i++) ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f000a | (i << 8), data->f2x9cx0d0f0_8_0_0a[i]); ++ for (i=0; i<9; i++) ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f0002 | (i << 8), (0x8000 | data->f2x9cx0d0f0_8_0_02[i])); ++ ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f8006, data->f2x9cx0d0f8006); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f800a, data->f2x9cx0d0f800a); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f8106, data->f2x9cx0d0f8106); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f810a, data->f2x9cx0d0f810a); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc006, data->f2x9cx0d0fc006); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc00a, data->f2x9cx0d0fc00a); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc00e, data->f2x9cx0d0fc00e); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc012, data->f2x9cx0d0fc012); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f8002, (0x8000 | data->f2x9cx0d0f8002)); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f8102, (0x8000 | data->f2x9cx0d0f8102)); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fc002, (0x8000 | data->f2x9cx0d0fc002)); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f2002, (0x8000 | data->f2x9cx0d0f2002)); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f2102, (0x8000 | data->f2x9cx0d0f2102)); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f2202, (0x8000 | data->f2x9cx0d0f2202)); ++ ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe003, data->f2x9cx0d0fe003); ++ } + } + } + +@@ -416,11 +763,15 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste + if (!persistent_data->node[node].node_present) + continue; + +- ganged = !!(data->f2x110 & 0x10); ++ if (is_fam15h()) ++ ganged = 0; ++ else ++ ganged = !!(data->f2x110 & 0x10); + if ((ganged == 1) && (channel > 0)) + continue; + +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x04, data->f2x9cx04); ++ if (!is_fam15h()) ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x04, data->f2x9cx04); + } + } + +@@ -435,16 +786,19 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste + if (!dct_enabled) + continue; + +- ganged = !!(data->f2x110 & 0x10); ++ if (is_fam15h()) ++ ganged = 0; ++ else ++ ganged = !!(data->f2x110 & 0x10); + if ((ganged == 1) && (channel > 0)) + continue; + + printk(BIOS_SPEW, "Taking DIMMs out of self refresh node: %d channel: %d\n", node, channel); + + /* Exit self refresh mode */ +- dword = pci_read_config32(PCI_DEV(0, 0x18 + node, 2), 0x90 + (0x100 * channel)); ++ dword = read_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x90); + dword |= (1 << 1); +- pci_write_config32(PCI_DEV(0, 0x18 + node, 2), 0x90 + (0x100 * channel), dword); ++ write_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x90, dword); + } + } + +@@ -463,12 +817,12 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste + + /* Wait for transition from self refresh mode to complete */ + do { +- dword = pci_read_config32(PCI_DEV(0, 0x18 + node, 2), 0x90 + (0x100 * channel)); ++ dword = read_config32_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x90); + } while (dword & (1 << 1)); + + /* Restore registers */ +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0d0fe006, data->f2x9cx0d0fe006); +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0d0fe007, data->f2x9cx0d0fe007); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe006, data->f2x9cx0d0fe006); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0fe007, data->f2x9cx0d0fe007); + } + } + +@@ -480,26 +834,26 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste + continue; + + for (i=0; i<12; i++) +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x10 + i, data->f2x9cx10[i]); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x10 + i, data->f2x9cx10[i]); + for (i=0; i<12; i++) +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x20 + i, data->f2x9cx20[i]); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x20 + i, data->f2x9cx20[i]); + for (i=0; i<4; i++) + for (j=0; j<3; j++) +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), (0x01 + i) + (0x100 * j), data->f2x9cx3_0_0_3_1[i][j]); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, (0x01 + i) + (0x100 * j), data->f2x9cx3_0_0_3_1[i][j]); + for (i=0; i<4; i++) + for (j=0; j<3; j++) +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), (0x05 + i) + (0x100 * j), data->f2x9cx3_0_0_7_5[i][j]); +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0d, data->f2x9cx0d); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, (0x05 + i) + (0x100 * j), data->f2x9cx3_0_0_7_5[i][j]); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d, data->f2x9cx0d); + for (i=0; i<9; i++) +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0d0f0013 | (i << 8), data->f2x9cx0d0f0_f_0_13[i]); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f0013 | (i << 8), data->f2x9cx0d0f0_f_0_13[i]); + for (i=0; i<9; i++) +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0d0f0030 | (i << 8), data->f2x9cx0d0f0_f_0_30[i]); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f0030 | (i << 8), data->f2x9cx0d0f0_f_0_30[i]); + for (i=0; i<4; i++) +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0d0f2030 | (i << 8), data->f2x9cx0d0f2_f_0_30[i]); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f2030 | (i << 8), data->f2x9cx0d0f2_f_0_30[i]); + for (i=0; i<2; i++) + for (j=0; j<3; j++) +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0d0f0000 | (i << 8) | (j * 4), data->f2x9cx0d0f8_8_4_0[i][j]); +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x0d0f812f, data->f2x9cx0d0f812f); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f0000 | (i << 8) | (j * 4), data->f2x9cx0d0f8_8_4_0[i][j]); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x0d0f812f, data->f2x9cx0d0f812f); + } + } + +@@ -512,9 +866,9 @@ void restore_mct_data_from_save_variable(struct amd_s3_persistent_data* persiste + continue; + + for (i=0; i<12; i++) +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x30 + i, data->f2x9cx30[i]); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x30 + i, data->f2x9cx30[i]); + for (i=0; i<12; i++) +- write_amd_dct_index_register(PCI_DEV(0, 0x18 + node, 2), 0x98 + (0x100 * channel), 0x40 + i, data->f2x9cx40[i]); ++ write_amd_dct_index_register_dct(PCI_DEV(0, 0x18 + node, 2), node, channel, 0x98, 0x40 + i, data->f2x9cx40[i]); + } + } + } +diff --git a/src/northbridge/amd/amdmct/wrappers/mcti.h b/src/northbridge/amd/amdmct/wrappers/mcti.h +index 38e66e1..2aba377 100644 +--- a/src/northbridge/amd/amdmct/wrappers/mcti.h ++++ b/src/northbridge/amd/amdmct/wrappers/mcti.h +@@ -2,6 +2,7 @@ + * This file is part of the coreboot project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. ++ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -62,10 +63,15 @@ UPDATE AS NEEDED + #endif + + #ifndef MEM_MAX_LOAD_FREQ +-#if (CONFIG_DIMM_SUPPORT & 0x000F)==0x0005 /* AMD_FAM10_DDR3 */ +- #define MEM_MAX_LOAD_FREQ 800 +-#else +- #define MEM_MAX_LOAD_FREQ 400 ++#if (CONFIG_DIMM_SUPPORT & 0x000F)==0x0005 /* AMD_FAM10_DDR3 */ ++ #define MEM_MAX_LOAD_FREQ 933 ++ #define MEM_MIN_PLATFORM_FREQ_FAM10 400 ++ #define MEM_MIN_PLATFORM_FREQ_FAM15 333 ++#else /* AMD_FAM10_DDR2 */ ++ #define MEM_MAX_LOAD_FREQ 400 ++ #define MEM_MIN_PLATFORM_FREQ_FAM10 200 ++ /* DDR2 not available on Family 15h */ ++ #define MEM_MIN_PLATFORM_FREQ_FAM15 0 + #endif + #endif + +diff --git a/src/northbridge/amd/amdmct/wrappers/mcti_d.c b/src/northbridge/amd/amdmct/wrappers/mcti_d.c +index 47260f2..1d4eade 100644 +--- a/src/northbridge/amd/amdmct/wrappers/mcti_d.c ++++ b/src/northbridge/amd/amdmct/wrappers/mcti_d.c +@@ -44,7 +44,7 @@ + #define MINIMUM_DRAM_BELOW_4G 0x1000000 + + static const uint16_t ddr2_limits[4] = {400, 333, 266, 200}; +-static const uint16_t ddr3_limits[4] = {800, 666, 533, 400}; ++static const uint16_t ddr3_limits[16] = {933, 800, 666, 533, 400, 333, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + static u16 mctGet_NVbits(u8 index) + { +@@ -81,12 +81,19 @@ static u16 mctGet_NVbits(u8 index) + if (get_option(&nvram, "max_mem_clock") == CB_SUCCESS) { + int limit = val; + if (IS_ENABLED(CONFIG_DIMM_DDR3)) +- limit = ddr3_limits[nvram & 3]; ++ limit = ddr3_limits[nvram & 0xf]; + else if (IS_ENABLED(CONFIG_DIMM_DDR2)) +- limit = ddr2_limits[nvram & 3]; ++ limit = ddr2_limits[nvram & 0x3]; + val = min(limit, val); + } + break; ++ case NV_MIN_MEMCLK: ++ /* Minimum platform supported memclk */ ++ if (is_fam15h()) ++ val = MEM_MIN_PLATFORM_FREQ_FAM15; ++ else ++ val = MEM_MIN_PLATFORM_FREQ_FAM10; ++ break; + case NV_ECC_CAP: + #if SYSTEM_TYPE == SERVER + val = 1; /* memory bus ECC capable */ +@@ -254,6 +261,9 @@ static u16 mctGet_NVbits(u8 index) + case NV_L2BKScrub: + val = 0; /* Disabled - See L2Scrub in BKDG */ + break; ++ case NV_L3BKScrub: ++ val = 0; /* Disabled - See L3Scrub in BKDG */ ++ break; + case NV_DCBKScrub: + val = 0; /* Disabled - See DcacheScrub in BKDG */ + break; +@@ -299,10 +309,14 @@ static void mctGet_MaxLoadFreq(struct DCTStatStruc *pDCTstat) + pDCTstat->PresetmaxFreq = mctGet_NVbits(NV_MAX_MEMCLK); + + /* Determine the number of installed DIMMs */ ++ uint8_t dimm; + int ch1_count = 0; + int ch2_count = 0; + uint8_t ch1_registered = 0; + uint8_t ch2_registered = 0; ++ uint8_t ch1_voltage = 0; ++ uint8_t ch2_voltage = 0; ++ uint8_t highest_rank_count[2]; + int i; + for (i = 0; i < 15; i = i + 2) { + if (pDCTstat->DIMMValid & (1 << i)) +@@ -321,8 +335,26 @@ static void mctGet_MaxLoadFreq(struct DCTStatStruc *pDCTstat) + printk(BIOS_DEBUG, "mctGet_MaxLoadFreq: Channel 2: %d DIMM(s) detected\n", ch2_count); + } + ++#if (CONFIG_DIMM_SUPPORT & 0x000F)==0x0005 /* AMD_FAM10_DDR3 */ ++ for (i = 0; i < 15; i = i + 2) { ++ if (pDCTstat->DIMMValid & (1 << i)) ++ ch1_voltage |= pDCTstat->DimmConfiguredVoltage[i]; ++ if (pDCTstat->DIMMValid & (1 << (i + 1))) ++ ch2_voltage |= pDCTstat->DimmConfiguredVoltage[i + 1]; ++ } ++#endif ++ ++ for (i = 0; i < 2; i++) { ++ sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[i]; ++ highest_rank_count[i] = 0x0; ++ for (dimm = 0; dimm < 8; dimm++) { ++ if (pDCTData->DimmRanks[dimm] > highest_rank_count[i]) ++ highest_rank_count[i] = pDCTData->DimmRanks[dimm]; ++ } ++ } ++ + /* Set limits if needed */ +- pDCTstat->PresetmaxFreq = mct_MaxLoadFreq(max(ch1_count, ch2_count), (ch1_registered || ch2_registered), pDCTstat->PresetmaxFreq); ++ pDCTstat->PresetmaxFreq = mct_MaxLoadFreq(max(ch1_count, ch2_count), max(highest_rank_count[0], highest_rank_count[1]), (ch1_registered || ch2_registered), (ch1_voltage | ch2_voltage), pDCTstat->PresetmaxFreq); + } + + #ifdef UNUSED_CODE +@@ -486,7 +518,7 @@ static void mctHookAfterAnyTraining(void) + { + } + +-static u32 mctGetLogicalCPUID_D(u8 node) ++static uint64_t mctGetLogicalCPUID_D(u8 node) + { + return mctGetLogicalCPUID(node); + } +-- +1.9.1 + |