diff --git a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/.calculate_directory b/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/.calculate_directory deleted file mode 100644 index 367380adf..000000000 --- a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/.calculate_directory +++ /dev/null @@ -1 +0,0 @@ -# Calculate append=skip merge(sys-kernel/calculate-sources)=>5.4,5.5 diff --git a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/10-calculate-x86_64 b/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/10-calculate-x86_64 deleted file mode 100644 index 43b4c8aaf..000000000 --- a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/10-calculate-x86_64 +++ /dev/null @@ -1,4266 +0,0 @@ -# Calculate format=kernel name=.config os_install_arch_machine==x86_64 -CONFIG_60XX_WDT=m -CONFIG_8139CP=m -CONFIG_8139_OLD_RX_RESET=y -CONFIG_8139TOO_8129=y -CONFIG_8139TOO=m -CONFIG_8139TOO_TUNE_TWISTER=y -CONFIG_842_COMPRESS=m -CONFIG_842_DECOMPRESS=m -CONFIG_88EU_AP_MODE=y -# CONFIG_ABP060MG is not set -CONFIG_AC97_BUS=m -CONFIG_ACENIC=m -CONFIG_ACENIC_OMIT_TIGON_I=y -CONFIG_ACERHDF=m -CONFIG_ACER_WIRELESS=m -CONFIG_ACER_WMI=m -# CONFIG_ACORN_PARTITION is not set -CONFIG_ACPI_AC=m -CONFIG_ACPI_ALS=m -CONFIG_ACPI_BATTERY=m -# CONFIG_ACPI_BGRT is not set -CONFIG_ACPI_BUTTON=m -CONFIG_ACPI_CMPC=m -CONFIG_ACPI_FAN=m -# CONFIG_ACPI_IPMI is not set -CONFIG_ACPI_PROCESSOR_AGGREGATOR=m -# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set -CONFIG_ACPI_SBS=m -CONFIG_ACPI_TAD=m -CONFIG_ACPI_THERMAL=m -CONFIG_ACPI_THERMAL_REL=m -CONFIG_ACPI_TOSHIBA=m -CONFIG_ACPI_VIDEO=m -CONFIG_ACPI_WMI=m -CONFIG_ACQUIRE_WDT=m -# CONFIG_ACRN_GUEST is not set -# CONFIG_AD5064 is not set -# CONFIG_AD5272 is not set -# CONFIG_AD5380 is not set -# CONFIG_AD5446 is not set -# CONFIG_AD5593R is not set -# CONFIG_AD5696_I2C is not set -# CONFIG_AD5933 is not set -# CONFIG_AD7150 is not set -# CONFIG_AD7291 is not set -# CONFIG_AD7606_IFACE_PARALLEL is not set -# CONFIG_AD7746 is not set -# CONFIG_AD799X is not set -CONFIG_ADAPTEC_STARFIRE=m -# CONFIG_ADE7854 is not set -# CONFIG_ADJD_S311 is not set -CONFIG_ADM8211=m -CONFIG_ADVANTECH_WDT=m -# CONFIG_ADXL345_I2C is not set -# CONFIG_ADXL372_I2C is not set -# CONFIG_AFE4404 is not set -CONFIG_AGP_AMD64=m -CONFIG_AGP_INTEL=m -CONFIG_AGP_SIS=m -CONFIG_AGP_VIA=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -CONFIG_AIC79XX_REG_PRETTY_PRINT=y -CONFIG_AIC79XX_RESET_DELAY_MS=5000 -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -# CONFIG_AIC7XXX_DEBUG_ENABLE is not set -CONFIG_AIC7XXX_DEBUG_MASK=0 -# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set -CONFIG_AIC7XXX_RESET_DELAY_MS=5000 -# CONFIG_AIC94XX_DEBUG is not set -CONFIG_AIRO_CS=m -CONFIG_AIRO=m -# CONFIG_AIX_PARTITION is not set -# CONFIG_AL3320A is not set -CONFIG_ALIENWARE_WMI=m -CONFIG_ALIM1535_WDT=m -CONFIG_ALIM7101_WDT=m -CONFIG_ALTERA_MSGDMA=m -CONFIG_ALTERA_STAPL=m -CONFIG_ALTERA_TSE=m -CONFIG_ALX=m -CONFIG_AM2315=m -CONFIG_AMD8111_ETH=m -CONFIG_AMD_IOMMU_V2=m -# CONFIG_AMD_NUMA is not set -CONFIG_AMD_PHY=m -CONFIG_AMD_XGBE_HAVE_ECC=y -CONFIG_AMD_XGBE=m -# CONFIG_AMIGA_PARTITION is not set -CONFIG_AMILO_RFKILL=m -# CONFIG_APDS9300 is not set -# CONFIG_APDS9960 is not set -CONFIG_APPLE_GMUX=m -CONFIG_AQTION=m -CONFIG_AR5523=m -CONFIG_ARCH_CPUIDLE_HALTPOLL=y -CONFIG_ASUS_LAPTOP=m -CONFIG_ASUS_NB_WMI=m -CONFIG_ASUS_WIRELESS=m -CONFIG_ASUS_WMI=m -CONFIG_ASYNC_CORE=m -CONFIG_ASYNC_MEMCPY=m -CONFIG_ASYNC_PQ=m -CONFIG_ASYNC_RAID6_RECOV=m -# CONFIG_ASYNC_RAID6_TEST is not set -CONFIG_ASYNC_XOR=m -CONFIG_AT76C50X_USB=m -CONFIG_AT803X_PHY=m -CONFIG_ATA_GENERIC=m -CONFIG_ATA_PIIX=m -# CONFIG_ATARI_PARTITION is not set -# CONFIG_ATA_VERBOSE_ERROR is not set -CONFIG_ATH10K_CE=y -# CONFIG_ATH10K_DEBUGFS is not set -# CONFIG_ATH10K_DEBUG is not set -CONFIG_ATH10K=m -CONFIG_ATH10K_PCI=m -CONFIG_ATH10K_SDIO=m -CONFIG_ATH10K_USB=m -# CONFIG_ATH5K_DEBUG is not set -CONFIG_ATH5K=m -CONFIG_ATH5K_PCI=y -# CONFIG_ATH6KL_DEBUG is not set -CONFIG_ATH6KL=m -CONFIG_ATH6KL_SDIO=m -CONFIG_ATH6KL_USB=m -CONFIG_ATH9K_AHB=y -CONFIG_ATH9K_BTCOEX_SUPPORT=y -CONFIG_ATH9K_CHANNEL_CONTEXT=y -CONFIG_ATH9K_COMMON=m -# CONFIG_ATH9K_DEBUGFS is not set -CONFIG_ATH9K_DYNACK=y -# CONFIG_ATH9K_HTC_DEBUGFS is not set -CONFIG_ATH9K_HTC=m -CONFIG_ATH9K_HW=m -CONFIG_ATH9K_HWRNG=y -CONFIG_ATH9K=m -CONFIG_ATH9K_PCI_NO_EEPROM=m -CONFIG_ATH9K_PCI=y -CONFIG_ATH9K_PCOEM=y -CONFIG_ATH9K_RFKILL=y -# CONFIG_ATH9K_WOW is not set -CONFIG_ATH_COMMON=m -CONFIG_ATL1C=m -CONFIG_ATL1E=m -CONFIG_ATL1=m -CONFIG_ATL2=m -# CONFIG_ATLAS_PH_SENSOR is not set -CONFIG_ATMEL=m -CONFIG_ATP=m -CONFIG_AURORA_NB8800=m -# CONFIG_AXP20X_ADC is not set -# CONFIG_AXP20X_POWER is not set -CONFIG_AXP288_ADC=m -# CONFIG_AXP288_CHARGER is not set -# CONFIG_AXP288_FUEL_GAUGE is not set -CONFIG_B43_BCMA_PIO=y -CONFIG_B43_BCMA=y -CONFIG_B43_BUSES_BCMA_AND_SSB=y -# CONFIG_B43_BUSES_BCMA is not set -# CONFIG_B43_BUSES_SSB is not set -# CONFIG_B43_DEBUG is not set -CONFIG_B43_HWRNG=y -CONFIG_B43_LEDS=y -CONFIG_B43LEGACY_DEBUG=y -CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y -# CONFIG_B43LEGACY_DMA_MODE is not set -CONFIG_B43LEGACY_DMA=y -CONFIG_B43LEGACY_HWRNG=y -CONFIG_B43LEGACY_LEDS=y -CONFIG_B43LEGACY=m -CONFIG_B43LEGACY_PCI_AUTOSELECT=y -CONFIG_B43LEGACY_PCICORE_AUTOSELECT=y -# CONFIG_B43LEGACY_PIO_MODE is not set -CONFIG_B43LEGACY_PIO=y -CONFIG_B43=m -CONFIG_B43_PCI_AUTOSELECT=y -CONFIG_B43_PCICORE_AUTOSELECT=y -CONFIG_B43_PHY_G=y -CONFIG_B43_PHY_HT=y -CONFIG_B43_PHY_LP=y -CONFIG_B43_PHY_N=y -CONFIG_B43_PIO=y -CONFIG_B43_SDIO=y -CONFIG_B43_SSB=y -CONFIG_B44=m -CONFIG_B44_PCI_AUTOSELECT=y -CONFIG_B44_PCICORE_AUTOSELECT=y -CONFIG_B44_PCI=y -CONFIG_B53=m -CONFIG_B53_MDIO_DRIVER=m -CONFIG_B53_MMAP_DRIVER=m -CONFIG_B53_SERDES=m -CONFIG_B53_SRAB_DRIVER=m -CONFIG_BACKLIGHT_ADP8870=m -CONFIG_BACKLIGHT_APPLE=m -CONFIG_BACKLIGHT_ARCXCNN=m -CONFIG_BACKLIGHT_BD6107=m -CONFIG_BACKLIGHT_GENERIC=m -CONFIG_BACKLIGHT_LM3639=m -CONFIG_BACKLIGHT_LV5207LP=m -CONFIG_BACKLIGHT_PM8941_WLED=m -CONFIG_BATMAN_ADV_BATMAN_V=y -CONFIG_BATMAN_ADV_BLA=y -CONFIG_BATMAN_ADV_DAT=y -# CONFIG_BATMAN_ADV_DEBUGFS is not set -# CONFIG_BATMAN_ADV_DEBUG is not set -CONFIG_BATMAN_ADV=m -# CONFIG_BATMAN_ADV_MCAST is not set -CONFIG_BATMAN_ADV_NC=y -CONFIG_BATMAN_ADV_SYSFS=y -# CONFIG_BATTERY_DA9150 is not set -# CONFIG_BATTERY_RT5033 is not set -# CONFIG_BCACHE_CLOSURES_DEBUG is not set -# CONFIG_BCACHE_DEBUG is not set -CONFIG_BCACHE=m -CONFIG_BCM7XXX_PHY=m -CONFIG_BCM87XX_PHY=m -CONFIG_BCMA_BLOCKIO=y -# CONFIG_BCMA_DEBUG is not set -# CONFIG_BCMA_DRIVER_GMAC_CMN is not set -CONFIG_BCMA_DRIVER_PCI=y -CONFIG_BCMA_HOST_PCI_POSSIBLE=y -CONFIG_BCMA_HOST_PCI=y -CONFIG_BCMA_HOST_SOC=y -CONFIG_BCMA=m -CONFIG_BCMA_SFLASH=y -CONFIG_BCMGENET=m -CONFIG_BCM_NET_PHYLIB=m -CONFIG_BE2ISCSI=m -CONFIG_BE2NET_BE2=y -CONFIG_BE2NET_BE3=y -CONFIG_BE2NET_HWMON=y -CONFIG_BE2NET_LANCER=y -CONFIG_BE2NET=m -CONFIG_BE2NET_SKYHAWK=y -# CONFIG_BFQ_CGROUP_DEBUG is not set -CONFIG_BFQ_GROUP_IOSCHED=y -CONFIG_BH1750=m -CONFIG_BH1780=m -CONFIG_BLK_CGROUP_IOCOST=y -CONFIG_BLK_CGROUP_IOLATENCY=y -CONFIG_BLK_CGROUP=y -CONFIG_BLK_DEV_3W_XXXX_RAID=m -CONFIG_BLK_DEV_BSGLIB=y -CONFIG_BLK_DEV_DM=m -CONFIG_BLK_DEV_INTEGRITY=y -CONFIG_BLK_DEV_MD=m -CONFIG_BLK_DEV_NVME=y -CONFIG_BLK_DEV_PCIESSD_MTIP32XX=m -CONFIG_BLK_DEV_PMEM=m -CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_BLK_DEV_RAM=m -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_RSXX=m -CONFIG_BLK_DEV_SX8=m -# CONFIG_BLK_DEV_THROTTLING_LOW is not set -CONFIG_BLK_DEV_THROTTLING=y -CONFIG_BLK_MQ_VIRTIO=y -CONFIG_BLK_RQ_ALLOC_TIME=y -CONFIG_BLK_WBT_MQ=y -CONFIG_BLK_WBT=y -# CONFIG_BMA180 is not set -# CONFIG_BMC150_ACCEL is not set -# CONFIG_BMC150_MAGN_I2C is not set -# CONFIG_BME680 is not set -# CONFIG_BMG160 is not set -# CONFIG_BMI160_I2C is not set -# CONFIG_BMP280 is not set -CONFIG_BNA=m -CONFIG_BNX2=m -CONFIG_BNX2X=m -CONFIG_BNX2X_SRIOV=y -CONFIG_BNXT_FLOWER_OFFLOAD=y -CONFIG_BNXT_HWMON=y -CONFIG_BNXT=m -CONFIG_BNXT_SRIOV=y -CONFIG_BONDING=m -CONFIG_BPFILTER_UMH=m -CONFIG_BPFILTER=y -# CONFIG_BPF_JIT_ALWAYS_ON is not set -CONFIG_BPF_JIT=y -# CONFIG_BPF_STREAM_PARSER is not set -CONFIG_BPF_SYSCALL=y -# CONFIG_BRCMDBG is not set -CONFIG_BRCMFMAC=m -CONFIG_BRCMFMAC_PCIE=y -CONFIG_BRCMFMAC_PROTO_BCDC=y -CONFIG_BRCMFMAC_PROTO_MSGBUF=y -CONFIG_BRCMFMAC_SDIO=y -CONFIG_BRCMFMAC_USB=y -CONFIG_BRCMSMAC=m -# CONFIG_BRCM_TRACING is not set -CONFIG_BRCMUTIL=m -CONFIG_BRIDGE_EBT_802_3=m -CONFIG_BRIDGE_EBT_AMONG=m -CONFIG_BRIDGE_EBT_ARP=m -CONFIG_BRIDGE_EBT_ARPREPLY=m -CONFIG_BRIDGE_EBT_BROUTE=m -CONFIG_BRIDGE_EBT_DNAT=m -CONFIG_BRIDGE_EBT_IP6=m -CONFIG_BRIDGE_EBT_IP=m -CONFIG_BRIDGE_EBT_LIMIT=m -CONFIG_BRIDGE_EBT_LOG=m -CONFIG_BRIDGE_EBT_MARK=m -CONFIG_BRIDGE_EBT_MARK_T=m -CONFIG_BRIDGE_EBT_NFLOG=m -CONFIG_BRIDGE_EBT_PKTTYPE=m -CONFIG_BRIDGE_EBT_REDIRECT=m -CONFIG_BRIDGE_EBT_SNAT=m -CONFIG_BRIDGE_EBT_STP=m -CONFIG_BRIDGE_EBT_T_FILTER=m -CONFIG_BRIDGE_EBT_T_NAT=m -CONFIG_BRIDGE_EBT_VLAN=m -CONFIG_BRIDGE_IGMP_SNOOPING=y -CONFIG_BRIDGE=m -CONFIG_BRIDGE_NETFILTER=m -CONFIG_BRIDGE_NF_EBTABLES=m -CONFIG_BRIDGE_VLAN_FILTERING=y -CONFIG_BROADCOM_PHY=m -# CONFIG_BSD_DISKLABEL is not set -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_BT_ATH3K=m -CONFIG_BT_BCM=m -CONFIG_BT_BNEP=m -CONFIG_BT_BNEP_MC_FILTER=y -CONFIG_BT_BNEP_PROTO_FILTER=y -CONFIG_BT_BREDR=y -CONFIG_BT_DEBUGFS=y -CONFIG_BT_HCIBCM203X=m -CONFIG_BT_HCIBFUSB=m -CONFIG_BT_HCIBLUECARD=m -CONFIG_BT_HCIBPA10X=m -CONFIG_BT_HCIBT3C=m -# CONFIG_BT_HCIBTSDIO is not set -# CONFIG_BT_HCIBTUSB_AUTOSUSPEND is not set -CONFIG_BT_HCIBTUSB_BCM=y -CONFIG_BT_HCIBTUSB=m -CONFIG_BT_HCIBTUSB_MTK=y -CONFIG_BT_HCIBTUSB_RTL=y -CONFIG_BT_HCIDTL1=m -CONFIG_BT_HCIRSI=m -# CONFIG_BT_HCIUART is not set -# CONFIG_BT_HCIVHCI is not set -CONFIG_BT_HIDP=m -CONFIG_BT_HS=y -CONFIG_BT_INTEL=m -CONFIG_BT_LEDS=y -CONFIG_BT_LE=y -CONFIG_BT=m -CONFIG_BT_MRVL=m -CONFIG_BT_MRVL_SDIO=m -CONFIG_BT_MTKSDIO=m -CONFIG_BT_MTKUART=m -CONFIG_BTREE=y -CONFIG_BT_RFCOMM=m -CONFIG_BT_RFCOMM_TTY=y -# CONFIG_BTRFS_ASSERT is not set -# CONFIG_BTRFS_DEBUG is not set -# CONFIG_BTRFS_FS_CHECK_INTEGRITY is not set -CONFIG_BTRFS_FS=m -CONFIG_BTRFS_FS_POSIX_ACL=y -# CONFIG_BTRFS_FS_REF_VERIFY is not set -# CONFIG_BTRFS_FS_RUN_SANITY_TESTS is not set -CONFIG_BT_RTL=m -# CONFIG_BT_SELFTEST is not set -CONFIG_BTT=y -# CONFIG_CACHEFILES_DEBUG is not set -# CONFIG_CACHEFILES_HISTOGRAM is not set -CONFIG_CACHEFILES=m -CONFIG_CADENCE_WATCHDOG=m -CONFIG_CASSINI=m -CONFIG_CAVIUM_PTP=y -CONFIG_CB710_CORE=m -CONFIG_CB710_DEBUG_ASSUMPTIONS=y -# CONFIG_CB710_DEBUG is not set -# CONFIG_CC10001_ADC is not set -# CONFIG_CCS811 is not set -CONFIG_CDROM_PKTCDVD_BUFFERS=8 -CONFIG_CDROM_PKTCDVD=m -# CONFIG_CDROM_PKTCDVD_WCACHE is not set -CONFIG_CEC_CORE=m -CONFIG_CEC_NOTIFIER=y -# CONFIG_CFG80211_CERTIFICATION_ONUS is not set -CONFIG_CFG80211=m -CONFIG_CFG80211_WEXT_EXPORT=y -CONFIG_CFG80211_WEXT=y -CONFIG_CFS_BANDWIDTH=y -CONFIG_CGROUP_BPF=y -CONFIG_CGROUP_DEVICE=y -CONFIG_CGROUP_NET_CLASSID=y -CONFIG_CGROUP_PERF=y -CONFIG_CGROUP_PIDS=y -CONFIG_CGROUP_WRITEBACK=y -CONFIG_CHARGER_ISP1704=m -# CONFIG_CHARGER_MANAGER is not set -# CONFIG_CHARGER_MAX14577 is not set -# CONFIG_CHARGER_MAX77693 is not set -CONFIG_CHECKPOINT_RESTORE=y -CONFIG_CHELSIO_IPSEC_INLINE=y -CONFIG_CHELSIO_LIB=m -CONFIG_CHELSIO_T1_1G=y -CONFIG_CHELSIO_T1=m -CONFIG_CHELSIO_T3=m -CONFIG_CHELSIO_T4=m -CONFIG_CHELSIO_T4VF=m -CONFIG_CHR_DEV_SG=m -CONFIG_CHR_DEV_ST=m -CONFIG_CICADA_PHY=m -CONFIG_CIFS_ALLOW_INSECURE_LEGACY=y -# CONFIG_CIFS_DEBUG is not set -CONFIG_CIFS_DFS_UPCALL=y -# CONFIG_CIFS_FSCACHE is not set -CONFIG_CIFS=m -CONFIG_CIFS_POSIX=y -# CONFIG_CIFS_STATS2 is not set -CONFIG_CIFS_UPCALL=y -# CONFIG_CIFS_WEAK_PW_HASH is not set -CONFIG_CIFS_XATTR=y -CONFIG_CLEANCACHE=y -CONFIG_CLOCK_THERMAL=y -CONFIG_CLS_U32_MARK=y -CONFIG_CLS_U32_PERF=y -# CONFIG_CM32181 is not set -# CONFIG_CM3232 is not set -# CONFIG_CM3323 is not set -# CONFIG_CM36651 is not set -# CONFIG_CMDLINE_PARTITION is not set -CONFIG_CNIC=m -# CONFIG_COMEDI is not set -CONFIG_COMMON_CLK_SI5341=m -# CONFIG_COMPACTION is not set -CONFIG_COMPAL_LAPTOP=m -CONFIG_COMPAT_NETLINK_MESSAGES=y -CONFIG_CONFIGFS_FS=m -CONFIG_CORDIC=m -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_CORTINA_PHY=m -CONFIG_CPU5_WDT=m -CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y -# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set -CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m -CONFIG_CPU_FREQ_GOV_POWERSAVE=m -CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y -CONFIG_CPU_FREQ_GOV_USERSPACE=m -CONFIG_CPU_FREQ_STAT=y -CONFIG_CPU_IDLE_GOV_HALTPOLL=y -CONFIG_CPU_IDLE_GOV_LADDER=y -CONFIG_CPU_IDLE_GOV_TEO=y -# CONFIG_CRASH_DUMP is not set -CONFIG_CRC16=m -CONFIG_CRC64=m -CONFIG_CRC7=m -CONFIG_CRC8=m -CONFIG_CRC_CCITT=m -CONFIG_CRC_ITU_T=m -CONFIG_CRC_T10DIF=y -CONFIG_CRYPTO_842=m -CONFIG_CRYPTO_ADIANTUM=m -CONFIG_CRYPTO_AEGIS128_AESNI_SSE2=m -CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AES_NI_INTEL=m -CONFIG_CRYPTO_AES_TI=m -CONFIG_CRYPTO_ARC4=m -CONFIG_CRYPTO_AUTHENC=m -CONFIG_CRYPTO_BLOWFISH_COMMON=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_BLOWFISH_X86_64=m -CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64=m -CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64=m -CONFIG_CRYPTO_CAMELLIA=m -CONFIG_CRYPTO_CAMELLIA_X86_64=m -CONFIG_CRYPTO_CAST5_AVX_X86_64=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6_AVX_X86_64=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_CAST_COMMON=m -CONFIG_CRYPTO_CBC=m -CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_CFB=m -CONFIG_CRYPTO_CHACHA20=m -CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_CHACHA20_X86_64=m -CONFIG_CRYPTO_CMAC=m -CONFIG_CRYPTO_CRC32C_INTEL=m -CONFIG_CRYPTO_CRC32C=m -CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_CRC32_PCLMUL=m -CONFIG_CRYPTO_CRCT10DIF_PCLMUL=m -CONFIG_CRYPTO_CRCT10DIF=y -CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_CTS=m -CONFIG_CRYPTO_DEFLATE=m -CONFIG_CRYPTO_DES3_EDE_X86_64=m -CONFIG_CRYPTO_DES=m -CONFIG_CRYPTO_DEV_ATMEL_ECC=m -CONFIG_CRYPTO_DEV_ATMEL_I2C=m -CONFIG_CRYPTO_DEV_ATMEL_SHA204A=m -CONFIG_CRYPTO_DEV_CCP_CRYPTO=m -CONFIG_CRYPTO_DEV_CCP_DD=m -# CONFIG_CRYPTO_DEV_CCP_DEBUGFS is not set -CONFIG_CRYPTO_DEV_CCP=y -CONFIG_CRYPTO_DEV_CHELSIO=m -CONFIG_CRYPTO_DEV_CHELSIO_TLS=m -CONFIG_CRYPTO_DEV_NITROX_CNN55XX=m -CONFIG_CRYPTO_DEV_NITROX=m -CONFIG_CRYPTO_DEV_QAT_C3XXX=m -CONFIG_CRYPTO_DEV_QAT_C3XXXVF=m -CONFIG_CRYPTO_DEV_QAT_C62X=m -CONFIG_CRYPTO_DEV_QAT_C62XVF=m -CONFIG_CRYPTO_DEV_QAT_DH895xCC=m -CONFIG_CRYPTO_DEV_QAT_DH895xCCVF=m -CONFIG_CRYPTO_DEV_QAT=m -CONFIG_CRYPTO_DEV_SAFEXCEL=m -CONFIG_CRYPTO_DEV_SP_CCP=y -CONFIG_CRYPTO_DEV_SP_PSP=y -CONFIG_CRYPTO_DEV_VIRTIO=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_DRBG_CTR=y -CONFIG_CRYPTO_DRBG_HASH=y -CONFIG_CRYPTO_ECB=m -CONFIG_CRYPTO_ECC=m -CONFIG_CRYPTO_ECDH=m -CONFIG_CRYPTO_ECHAINIV=m -CONFIG_CRYPTO_ECRDSA=m -CONFIG_CRYPTO_ENGINE=m -CONFIG_CRYPTO_ESSIV=m -CONFIG_CRYPTO_FCRYPT=m -CONFIG_CRYPTO_GLUE_HELPER_X86=m -CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_KPP=m -CONFIG_CRYPTO_LIB_ARC4=m -CONFIG_CRYPTO_LIB_DES=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_LZ4HC=y -CONFIG_CRYPTO_LZ4=y -CONFIG_CRYPTO_LZO=y -CONFIG_CRYPTO_MD4=y -CONFIG_CRYPTO_MD5=m -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_NHPOLY1305_AVX2=m -CONFIG_CRYPTO_NHPOLY1305=m -CONFIG_CRYPTO_NHPOLY1305_SSE2=m -CONFIG_CRYPTO_OFB=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_PCRYPT=m -CONFIG_CRYPTO_POLY1305=m -CONFIG_CRYPTO_POLY1305_X86_64=m -CONFIG_CRYPTO_RMD128=m -CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m -CONFIG_CRYPTO_SALSA20=m -CONFIG_CRYPTO_SEED=m -CONFIG_CRYPTO_SERPENT_AVX2_X86_64=m -CONFIG_CRYPTO_SERPENT_AVX_X86_64=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_SERPENT_SSE2_X86_64=m -CONFIG_CRYPTO_SHA1=m -CONFIG_CRYPTO_SHA1_SSSE3=m -CONFIG_CRYPTO_SHA256_SSSE3=m -CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_SHA512_SSSE3=m -CONFIG_CRYPTO_SIMD=m -CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_SM4=m -# CONFIG_CRYPTO_STATS is not set -CONFIG_CRYPTO_STREEBOG=m -CONFIG_CRYPTO_TEA=m -CONFIG_CRYPTO_TGR192=m -CONFIG_CRYPTO_TWOFISH_AVX_X86_64=m -CONFIG_CRYPTO_TWOFISH_COMMON=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_TWOFISH_X86_64_3WAY=m -CONFIG_CRYPTO_TWOFISH_X86_64=m -CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_CRYPTO_USER_API_HASH=m -CONFIG_CRYPTO_USER_API=m -CONFIG_CRYPTO_USER_API_RNG=m -CONFIG_CRYPTO_USER_API_SKCIPHER=m -CONFIG_CRYPTO_USER=m -CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_XTS=m -CONFIG_CRYPTO_XXHASH=m -CONFIG_CRYPTO_ZSTD=m -# CONFIG_CUSE is not set -CONFIG_CW1200=m -CONFIG_CW1200_WLAN_SDIO=m -CONFIG_CX_ECAT=m -CONFIG_CYPRESS_FIRMWARE=m -# CONFIG_DA280 is not set -# CONFIG_DA311 is not set -# CONFIG_DA9062_WATCHDOG is not set -# CONFIG_DA9063_WATCHDOG is not set -# CONFIG_DA9150_GPADC is not set -CONFIG_DAVICOM_PHY=m -CONFIG_DAX_DRIVER=y -CONFIG_DAX=y -CONFIG_DCA=m -CONFIG_DCDBAS=m -CONFIG_DE2104X_DSL=0 -CONFIG_DE2104X=m -CONFIG_DE4X5=m -# CONFIG_DEBUG_BOOT_PARAMS is not set -# CONFIG_DEBUG_DEVRES is not set -# CONFIG_DEBUG_KERNEL_DC is not set -# CONFIG_DEBUG_PINCTRL is not set -# CONFIG_DEBUG_PREEMPT is not set -CONFIG_DEBUG_RODATA_TEST=y -# CONFIG_DEBUG_RSEQ is not set -CONFIG_DEBUG_SECTION_MISMATCH=y -# CONFIG_DEBUG_STACK_USAGE is not set -CONFIG_DEFAULT_HOSTNAME="calculate" -CONFIG_DEFAULT_SECURITY_DAC=y -CONFIG_DELL_LAPTOP=m -CONFIG_DELL_RBTN=m -CONFIG_DELL_SMBIOS=m -# CONFIG_DELL_SMBIOS_SMM is not set -# CONFIG_DELL_SMBIOS_WMI is not set -CONFIG_DELL_SMO8800=m -CONFIG_DELL_WMI_AIO=m -CONFIG_DELL_WMI_DESCRIPTOR=m -CONFIG_DELL_WMI_LED=m -CONFIG_DELL_WMI=m -CONFIG_DEV_COREDUMP=y -# CONFIG_DEVFREQ_GOV_PASSIVE is not set -# CONFIG_DEVFREQ_GOV_PERFORMANCE is not set -# CONFIG_DEVFREQ_GOV_POWERSAVE is not set -CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=m -# CONFIG_DEVFREQ_GOV_USERSPACE is not set -CONFIG_DEVFREQ_THERMAL=y -CONFIG_DEVKMEM=y -CONFIG_DIMLIB=y -CONFIG_DL2K=m -# CONFIG_DLM_DEBUG is not set -CONFIG_DLM=m -# CONFIG_DLN2_ADC is not set -CONFIG_DM9102=m -CONFIG_DMA_ENGINE_RAID=y -# CONFIG_DMARD09 is not set -# CONFIG_DMARD10 is not set -CONFIG_DM_BIO_PRISON=m -CONFIG_DM_BUFIO=m -CONFIG_DM_CACHE=m -CONFIG_DM_CACHE_SMQ=m -CONFIG_DM_CLONE=m -CONFIG_DM_CRYPT=m -# CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING is not set -CONFIG_DM_ERA=m -CONFIG_DM_MIRROR=m -CONFIG_DM_PERSISTENT_DATA=m -CONFIG_DM_RAID=m -CONFIG_DM_SNAPSHOT=m -CONFIG_DM_THIN_PROVISIONING=m -CONFIG_DM_WRITECACHE=m -# CONFIG_DM_ZERO is not set -CONFIG_DNET=m -CONFIG_DP83822_PHY=m -CONFIG_DP83848_PHY=m -CONFIG_DP83867_PHY=m -CONFIG_DP83TC811_PHY=m -# CONFIG_DPS310 is not set -CONFIG_DRAGONRISE_FF=y -CONFIG_DRM_AMD_ACP=y -CONFIG_DRM_AMD_DC_DCN1_0=y -CONFIG_DRM_AMD_DC_DCN2_0=y -CONFIG_DRM_AMD_DC_DCN2_1=y -CONFIG_DRM_AMD_DC_DSC_SUPPORT=y -CONFIG_DRM_AMD_DC=y -CONFIG_DRM_AMDGPU_CIK=y -# CONFIG_DRM_AMDGPU_GART_DEBUGFS is not set -CONFIG_DRM_AMDGPU=m -CONFIG_DRM_AMDGPU_SI=y -CONFIG_DRM_AMDGPU_USERPTR=y -CONFIG_DRM_ANALOGIX_ANX78XX=m -CONFIG_DRM_ATI_PCIGART=y -CONFIG_DRM_DEBUG_SELFTEST=m -CONFIG_DRM_DP_AUX_CHARDEV=y -# CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM is not set -CONFIG_DRM_GMA3600=y -CONFIG_DRM_GMA500=m -CONFIG_DRM_GMA600=y -CONFIG_DRM_I2C_CH7006=m -CONFIG_DRM_I2C_NXP_TDA9950=m -CONFIG_DRM_I2C_NXP_TDA998X=m -CONFIG_DRM_I2C_SIL164=m -CONFIG_DRM_I915_ALPHA_SUPPORT=y -# CONFIG_DRM_I915_DEBUG_GUC is not set -# CONFIG_DRM_I915_DEBUG is not set -# CONFIG_DRM_I915_DEBUG_MMIO is not set -# CONFIG_DRM_I915_DEBUG_RUNTIME_PM is not set -# CONFIG_DRM_I915_DEBUG_VBLANK_EVADE is not set -CONFIG_DRM_I915_FORCE_PROBE="*" -CONFIG_DRM_I915_GVT_KVMGT=m -CONFIG_DRM_I915_GVT=y -# CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS is not set -CONFIG_DRM_I915=m -# CONFIG_DRM_I915_SELFTEST is not set -# CONFIG_DRM_I915_SW_FENCE_CHECK_DAG is not set -# CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS is not set -# CONFIG_DRM_I915_WERROR is not set -CONFIG_DRM_KMS_HELPER=m -CONFIG_DRM_LEGACY=y -CONFIG_DRM_LIB_RANDOM=y -CONFIG_DRM=m -CONFIG_DRM_MGA=m -CONFIG_DRM_NOUVEAU_BACKLIGHT=y -CONFIG_DRM_NOUVEAU=m -CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -CONFIG_DRM_RADEON_USERPTR=y -CONFIG_DRM_SAVAGE=m -CONFIG_DRM_SCHED=m -CONFIG_DRM_SIS=m -CONFIG_DRM_TDFX=m -CONFIG_DRM_TTM=m -CONFIG_DRM_UDL=m -CONFIG_DRM_VGEM=m -CONFIG_DRM_VIA=m -CONFIG_DRM_VIRTIO_GPU=m -CONFIG_DRM_VKMS=m -CONFIG_DRM_VM=y -# CONFIG_DS1803 is not set -# CONFIG_DS4424 is not set -CONFIG_DUMMY=m -CONFIG_DVB_A8293=m -CONFIG_DVB_AF9013=m -CONFIG_DVB_AF9033=m -CONFIG_DVB_AS102_FE=m -CONFIG_DVB_AS102=m -CONFIG_DVB_ASCOT2E=m -CONFIG_DVB_ATBM8830=m -CONFIG_DVB_AU8522_DTV=m -CONFIG_DVB_AU8522=m -CONFIG_DVB_AU8522_V4L=m -CONFIG_DVB_AV7110_IR=y -CONFIG_DVB_AV7110=m -CONFIG_DVB_AV7110_OSD=y -CONFIG_DVB_B2C2_FLEXCOP=m -# CONFIG_DVB_B2C2_FLEXCOP_PCI_DEBUG is not set -CONFIG_DVB_B2C2_FLEXCOP_PCI=m -# CONFIG_DVB_B2C2_FLEXCOP_USB_DEBUG is not set -CONFIG_DVB_B2C2_FLEXCOP_USB=m -CONFIG_DVB_BCM3510=m -CONFIG_DVB_BUDGET_AV=m -CONFIG_DVB_BUDGET_CI=m -CONFIG_DVB_BUDGET_CORE=m -CONFIG_DVB_BUDGET=m -CONFIG_DVB_BUDGET_PATCH=m -CONFIG_DVB_CORE=m -CONFIG_DVB_CX22700=m -CONFIG_DVB_CX22702=m -CONFIG_DVB_CX24110=m -CONFIG_DVB_CX24116=m -CONFIG_DVB_CX24117=m -CONFIG_DVB_CX24120=m -CONFIG_DVB_CX24123=m -CONFIG_DVB_CXD2099=m -CONFIG_DVB_CXD2820R=m -CONFIG_DVB_CXD2841ER=m -CONFIG_DVB_DDBRIDGE=m -# CONFIG_DVB_DDBRIDGE_MSIENABLE is not set -# CONFIG_DVB_DEMUX_SECTION_LOSS_LOG is not set -CONFIG_DVB_DIB3000MB=m -CONFIG_DVB_DIB3000MC=m -CONFIG_DVB_DIB7000M=m -CONFIG_DVB_DIB7000P=m -CONFIG_DVB_DIB8000=m -CONFIG_DVB_DIB9000=m -CONFIG_DVB_DM1105=m -CONFIG_DVB_DRX39XYJ=m -CONFIG_DVB_DRXD=m -CONFIG_DVB_DRXK=m -CONFIG_DVB_DS3000=m -CONFIG_DVB_DUMMY_FE=m -# CONFIG_DVB_DYNAMIC_MINORS is not set -CONFIG_DVB_EC100=m -# CONFIG_DVB_FIREDTV is not set -CONFIG_DVB_GP8PSK_FE=m -CONFIG_DVB_HELENE=m -CONFIG_DVB_HOPPER=m -CONFIG_DVB_HORUS3A=m -CONFIG_DVB_ISL6405=m -CONFIG_DVB_ISL6421=m -CONFIG_DVB_ISL6423=m -CONFIG_DVB_IX2505V=m -CONFIG_DVB_L64781=m -CONFIG_DVB_LG2160=m -CONFIG_DVB_LGDT3305=m -CONFIG_DVB_LGDT3306A=m -CONFIG_DVB_LGDT330X=m -CONFIG_DVB_LGS8GL5=m -CONFIG_DVB_LGS8GXX=m -CONFIG_DVB_LNBH25=m -CONFIG_DVB_LNBH29=m -CONFIG_DVB_LNBP21=m -CONFIG_DVB_LNBP22=m -CONFIG_DVB_M88DS3103=m -CONFIG_DVB_M88RS2000=m -CONFIG_DVB_MANTIS=m -CONFIG_DVB_MAX_ADAPTERS=8 -CONFIG_DVB_MB86A16=m -CONFIG_DVB_MB86A20S=m -# CONFIG_DVB_MMAP is not set -CONFIG_DVB_MN88443X=m -CONFIG_DVB_MN88472=m -CONFIG_DVB_MN88473=m -CONFIG_DVB_MT312=m -CONFIG_DVB_MT352=m -CONFIG_DVB_MXL5XX=m -CONFIG_DVB_NET=y -CONFIG_DVB_NGENE=m -CONFIG_DVB_NXT200X=m -CONFIG_DVB_NXT6000=m -CONFIG_DVB_OR51132=m -CONFIG_DVB_OR51211=m -CONFIG_DVB_PLATFORM_DRIVERS=y -CONFIG_DVB_PLL=m -CONFIG_DVB_PLUTO2=m -CONFIG_DVB_PT1=m -CONFIG_DVB_PT3=m -CONFIG_DVB_RTL2830=m -CONFIG_DVB_RTL2832=m -CONFIG_DVB_RTL2832_SDR=m -CONFIG_DVB_S5H1409=m -CONFIG_DVB_S5H1411=m -CONFIG_DVB_S5H1420=m -CONFIG_DVB_S5H1432=m -CONFIG_DVB_S921=m -CONFIG_DVB_SI2165=m -CONFIG_DVB_SI2168=m -CONFIG_DVB_SI21XX=m -CONFIG_DVB_SMIPCIE=m -CONFIG_DVB_SP2=m -CONFIG_DVB_SP8870=m -CONFIG_DVB_SP887X=m -CONFIG_DVB_STB0899=m -CONFIG_DVB_STB6000=m -CONFIG_DVB_STB6100=m -CONFIG_DVB_STV0288=m -CONFIG_DVB_STV0297=m -CONFIG_DVB_STV0299=m -CONFIG_DVB_STV0367=m -CONFIG_DVB_STV0900=m -CONFIG_DVB_STV090x=m -CONFIG_DVB_STV0910=m -CONFIG_DVB_STV6110=m -CONFIG_DVB_STV6110x=m -CONFIG_DVB_STV6111=m -CONFIG_DVB_TC90522=m -CONFIG_DVB_TDA10021=m -CONFIG_DVB_TDA10023=m -CONFIG_DVB_TDA10048=m -CONFIG_DVB_TDA1004X=m -CONFIG_DVB_TDA10071=m -CONFIG_DVB_TDA10086=m -CONFIG_DVB_TDA18271C2DD=m -CONFIG_DVB_TDA665x=m -CONFIG_DVB_TDA8083=m -CONFIG_DVB_TDA8261=m -CONFIG_DVB_TDA826X=m -CONFIG_DVB_TS2020=m -CONFIG_DVB_TTUSB_BUDGET=m -CONFIG_DVB_TTUSB_DEC=m -CONFIG_DVB_TUA6100=m -CONFIG_DVB_TUNER_CX24113=m -CONFIG_DVB_TUNER_DIB0070=m -CONFIG_DVB_TUNER_DIB0090=m -CONFIG_DVB_TUNER_ITD1000=m -# CONFIG_DVB_ULE_DEBUG is not set -CONFIG_DVB_USB_A800=m -CONFIG_DVB_USB_AF9005=m -CONFIG_DVB_USB_AF9005_REMOTE=m -CONFIG_DVB_USB_AF9015=m -CONFIG_DVB_USB_AF9035=m -CONFIG_DVB_USB_ANYSEE=m -CONFIG_DVB_USB_AU6610=m -CONFIG_DVB_USB_AZ6007=m -CONFIG_DVB_USB_AZ6027=m -CONFIG_DVB_USB_CE6230=m -CONFIG_DVB_USB_CINERGY_T2=m -# CONFIG_DVB_USB_CXUSB_ANALOG is not set -CONFIG_DVB_USB_CXUSB=m -# CONFIG_DVB_USB_DEBUG is not set -CONFIG_DVB_USB_DIB0700=m -CONFIG_DVB_USB_DIB3000MC=m -# CONFIG_DVB_USB_DIBUSB_MB_FAULTY is not set -CONFIG_DVB_USB_DIBUSB_MB=m -CONFIG_DVB_USB_DIBUSB_MC=m -CONFIG_DVB_USB_DIGITV=m -CONFIG_DVB_USB_DTT200U=m -CONFIG_DVB_USB_DTV5100=m -CONFIG_DVB_USB_DVBSKY=m -CONFIG_DVB_USB_DW2102=m -CONFIG_DVB_USB_EC168=m -CONFIG_DVB_USB_GL861=m -CONFIG_DVB_USB_GP8PSK=m -CONFIG_DVB_USB_LME2510=m -CONFIG_DVB_USB=m -CONFIG_DVB_USB_M920X=m -CONFIG_DVB_USB_MXL111SF=m -CONFIG_DVB_USB_NOVA_T_USB2=m -CONFIG_DVB_USB_OPERA1=m -CONFIG_DVB_USB_PCTV452E=m -CONFIG_DVB_USB_RTL28XXU=m -CONFIG_DVB_USB_TECHNISAT_USB2=m -CONFIG_DVB_USB_TTUSB2=m -CONFIG_DVB_USB_UMT_010=m -CONFIG_DVB_USB_V2=m -CONFIG_DVB_USB_VP702X=m -CONFIG_DVB_USB_VP7045=m -CONFIG_DVB_USB_ZD1301=m -CONFIG_DVB_VES1820=m -CONFIG_DVB_VES1X93=m -CONFIG_DVB_ZD1301_DEMOD=m -CONFIG_DVB_ZL10036=m -CONFIG_DVB_ZL10039=m -CONFIG_DVB_ZL10353=m -CONFIG_DWC_XLGMAC=m -CONFIG_DWC_XLGMAC_PCI=m -CONFIG_DW_DMAC=m -CONFIG_DW_DMAC_PCI=y -CONFIG_DW_EDMA=m -CONFIG_DW_EDMA_PCIE=m -CONFIG_DWMAC_GENERIC=m -CONFIG_DW_WATCHDOG=m -CONFIG_E1000E=m -CONFIG_E1000=m -CONFIG_E100=m -# CONFIG_EARLY_PRINTK_DBGP is not set -CONFIG_EBC_C384_WDT=m -CONFIG_ECRYPT_FS=m -# CONFIG_ECRYPT_FS_MESSAGING is not set -# CONFIG_EDAC is not set -CONFIG_EEEPC_LAPTOP=m -CONFIG_EEEPC_WMI=m -CONFIG_EEPROM_93CX6=m -CONFIG_EEPROM_IDT_89HPESX=m -CONFIG_EFI_FAKE_MEMMAP=y -CONFIG_EFI_MAX_FAKE_MEM=8 -CONFIG_EFI_PGT_DUMP=y -CONFIG_EFI_VARS=m -# CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_ENA_ETHERNET=m -CONFIG_ENCLOSURE_SERVICES=m -CONFIG_ENCRYPTED_KEYS=m -CONFIG_ENERGY_MODEL=y -CONFIG_ENIC=m -CONFIG_EPIC100=m -CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT=1 -# CONFIG_EROFS_FS_DEBUG is not set -CONFIG_EROFS_FS=m -CONFIG_EROFS_FS_POSIX_ACL=y -CONFIG_EROFS_FS_SECURITY=y -CONFIG_EROFS_FS_XATTR=y -CONFIG_EROFS_FS_ZIP=y -CONFIG_ETHOC=m -CONFIG_EUROTECH_WDT=m -# CONFIG_EXFAT_FS is not set -CONFIG_EXPERT=y -CONFIG_EXT2_FS=m -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_EXT4_FS=m -CONFIG_EXTCON_ADC_JACK=m -CONFIG_EXTCON_AXP288=m -CONFIG_EXTCON_FSA9480=m -CONFIG_EXTCON_MAX14577=m -CONFIG_EXTCON_MAX77693=m -CONFIG_EXTCON_RT8973A=m -CONFIG_EXTCON_SM5502=m -CONFIG_EXTCON=y -# CONFIG_F2FS_CHECK_FS is not set -# CONFIG_F2FS_FAULT_INJECTION is not set -CONFIG_F2FS_FS=m -CONFIG_F2FS_FS_POSIX_ACL=y -CONFIG_F2FS_FS_SECURITY=y -CONFIG_F2FS_FS_XATTR=y -CONFIG_F2FS_STAT_FS=y -CONFIG_FAILOVER=m -CONFIG_FANOTIFY=y -CONFIG_FAT_DEFAULT_CODEPAGE=866 -CONFIG_FAT_DEFAULT_IOCHARSET="utf8" -CONFIG_FAT_DEFAULT_UTF8=y -CONFIG_FAT_FS=m -CONFIG_FB_BOOT_VESA_SUPPORT=y -CONFIG_FB_HYPERV=m -# CONFIG_FB_INTEL is not set -CONFIG_FB_SIMPLE=y -# CONFIG_FB_SM750 is not set -CONFIG_FB_SYS_COPYAREA=m -CONFIG_FB_SYS_FILLRECT=m -CONFIG_FB_SYS_FOPS=m -CONFIG_FB_SYS_IMAGEBLIT=m -# CONFIG_FB_TILEBLITTING is not set -CONFIG_FB_VESA=y -CONFIG_FCOE_FNIC=m -CONFIG_FCOE=m -# CONFIG_FDDI is not set -CONFIG_FEALNX=m -# CONFIG_FIELDBUS_DEV is not set -CONFIG_FIREWIRE=m -# CONFIG_FIREWIRE_NET is not set -CONFIG_FIREWIRE_OHCI=m -CONFIG_FIREWIRE_SBP2=m -# CONFIG_FIREWIRE_SERIAL is not set -CONFIG_FIRMWARE_EDID=y -CONFIG_FIXED_PHY=m -CONFIG_FM10K=m -CONFIG_FORCEDETH=m -CONFIG_FRAME_VECTOR=y -CONFIG_FRAME_WARN=1024 -CONFIG_FRONTSWAP=y -# CONFIG_FSCACHE_DEBUG is not set -# CONFIG_FSCACHE_HISTOGRAM is not set -CONFIG_FSCACHE=m -# CONFIG_FSCACHE_OBJECT_LIST is not set -CONFIG_FSCACHE_STATS=y -CONFIG_FS_DAX=y -CONFIG_FS_MBCACHE=m -# CONFIG_FTRACE is not set -CONFIG_FUJITSU_ES=m -CONFIG_FUJITSU_LAPTOP=m -CONFIG_FUJITSU_TABLET=m -CONFIG_FUSE_FS=m -CONFIG_FUSION_CTL=m -CONFIG_FUSION_FC=m -# CONFIG_FUSION_LOGGING is not set -CONFIG_FUSION_MAX_SGE=128 -CONFIG_FUSION_SAS=m -CONFIG_FUSION_SPI=m -CONFIG_FUSION=y -CONFIG_FW_CFG_SYSFS_CMDLINE=y -CONFIG_FW_CFG_SYSFS=m -CONFIG_FW_LOADER_PAGED_BUF=y -CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y -CONFIG_FW_LOADER_USER_HELPER=y -# CONFIG_FXAS21002C is not set -CONFIG_GACT_PROB=y -CONFIG_GAMEPORT_EMU10K1=m -# CONFIG_GAMEPORT_FM801 is not set -# CONFIG_GAMEPORT_L4 is not set -CONFIG_GAMEPORT=m -# CONFIG_GAMEPORT_NS558 is not set -CONFIG_GART_IOMMU=y -# CONFIG_GCC_PLUGIN_CYC_COMPLEXITY is not set -# CONFIG_GENERIC_ADC_BATTERY is not set -CONFIG_GENERIC_ADC_THERMAL=m -CONFIG_GENERIC_PHY=y -CONFIG_GENEVE=m -CONFIG_GENWQE=m -CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY=0 -CONFIG_GFS2_FS_LOCKING_DLM=y -CONFIG_GFS2_FS=m -# CONFIG_GP2AP020A00F is not set -CONFIG_GPD_POCKET_FAN=m -CONFIG_GRACE_PERIOD=m -CONFIG_GREENASIA_FF=y -# CONFIG_GS_FPGABOOT is not set -CONFIG_GTP=m -CONFIG_GVE=m -CONFIG_HABANA_AI=m -CONFIG_HALTPOLL_CPUIDLE=y -CONFIG_HAMACHI=m -# CONFIG_HAMRADIO is not set -CONFIG_HAPPYMEAL=m -CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y -CONFIG_HAVE_KVM_EVENTFD=y -CONFIG_HAVE_KVM_IRQ_BYPASS=y -CONFIG_HAVE_KVM_IRQCHIP=y -CONFIG_HAVE_KVM_IRQFD=y -CONFIG_HAVE_KVM_IRQ_ROUTING=y -CONFIG_HAVE_KVM_MSI=y -CONFIG_HAVE_KVM_NO_POLL=y -CONFIG_HAVE_RCU_TABLE_FREE=y -# CONFIG_HDC100X is not set -CONFIG_HDMI_LPE_AUDIO=m -CONFIG_HERMES_CACHE_FW_ON_INIT=y -CONFIG_HERMES=m -CONFIG_HERMES_PRISM=y -CONFIG_HFSPLUS_FS=m -CONFIG_HID_ACCUTOUCH=m -CONFIG_HID_ACRUX_FF=y -CONFIG_HID_ACRUX=m -CONFIG_HID_ALPS=m -CONFIG_HID_ASUS=m -CONFIG_HID_CMEDIA=m -CONFIG_HID_CORSAIR=y -CONFIG_HID_COUGAR=m -CONFIG_HID_CREATIVE_SB0540=m -CONFIG_HID_DRAGONRISE=m -CONFIG_HID_ELAN=m -CONFIG_HID_ELECOM=m -CONFIG_HID_ELO=m -CONFIG_HID_EMS_FF=m -CONFIG_HID_GFRM=m -CONFIG_HID_GREENASIA=m -CONFIG_HID_GT683R=m -CONFIG_HID_GYRATION=m -CONFIG_HID_HOLTEK=m -CONFIG_HID_HYPERV_MOUSE=m -CONFIG_HID_ICADE=m -CONFIG_HID_ITE=m -CONFIG_HID_KEYTOUCH=m -CONFIG_HID_KYE=y -CONFIG_HID_LCPOWER=m -CONFIG_HID_LED=m -CONFIG_HID_LENOVO=m -CONFIG_HID_LOGITECH_DJ=m -CONFIG_HID_LOGITECH_HIDPP=m -CONFIG_HID_MACALLY=m -CONFIG_HID_MAGICMOUSE=m -CONFIG_HID_MALTRON=m -CONFIG_HID_MAYFLASH=m -CONFIG_HID_MULTITOUCH=m -CONFIG_HID_NTI=m -CONFIG_HID_NTRIG=m -CONFIG_HID_ORTEK=m -CONFIG_HID_PANTHERLORD=m -CONFIG_HID_PENMOUNT=m -CONFIG_HID_PETALYNX=m -CONFIG_HID_PICOLCD_BACKLIGHT=y -CONFIG_HID_PICOLCD_CIR=y -CONFIG_HID_PICOLCD_FB=y -CONFIG_HID_PICOLCD_LCD=y -CONFIG_HID_PICOLCD_LEDS=y -CONFIG_HID_PICOLCD=m -CONFIG_HID_PLANTRONICS=y -CONFIG_HID_PRIMAX=m -CONFIG_HID_PRODIKEYS=m -CONFIG_HID_REDRAGON=m -CONFIG_HID_RETRODE=m -CONFIG_HID_RMI=m -CONFIG_HID_ROCCAT=m -CONFIG_HID_SAITEK=m -CONFIG_HID_SAMSUNG=m -# CONFIG_HID_SENSOR_ACCEL_3D is not set -# CONFIG_HID_SENSOR_ALS is not set -CONFIG_HID_SENSOR_CUSTOM_SENSOR=m -# CONFIG_HID_SENSOR_DEVICE_ROTATION is not set -# CONFIG_HID_SENSOR_GYRO_3D is not set -CONFIG_HID_SENSOR_HUB=m -# CONFIG_HID_SENSOR_HUMIDITY is not set -CONFIG_HID_SENSOR_IIO_COMMON=m -CONFIG_HID_SENSOR_IIO_TRIGGER=m -# CONFIG_HID_SENSOR_INCLINOMETER_3D is not set -# CONFIG_HID_SENSOR_MAGNETOMETER_3D is not set -# CONFIG_HID_SENSOR_PRESS is not set -# CONFIG_HID_SENSOR_PROX is not set -# CONFIG_HID_SENSOR_TEMP is not set -CONFIG_HID_SMARTJOYPLUS=y -CONFIG_HID_SONY=m -CONFIG_HID_SPEEDLINK=m -CONFIG_HID_STEAM=m -CONFIG_HID_STEELSERIES=m -CONFIG_HID_SUNPLUS=m -CONFIG_HID_THINGM=m -CONFIG_HID_THRUSTMASTER=y -CONFIG_HID_TIVO=m -CONFIG_HID_TWINHAN=m -CONFIG_HID_U2FZERO=m -CONFIG_HID_UCLOGIC=m -CONFIG_HID_UDRAW_PS3=m -CONFIG_HID_VIEWSONIC=m -CONFIG_HID_WACOM=m -CONFIG_HID_WALTOP=m -CONFIG_HID_WIIMOTE=m -CONFIG_HID_XINMO=m -CONFIG_HID_ZEROPLUS=y -CONFIG_HID_ZYDACRON=m -CONFIG_HMM_MIRROR=y -CONFIG_HOLTEK_FF=y -CONFIG_HOSTAP_CS=m -CONFIG_HOSTAP_FIRMWARE_NVRAM=y -CONFIG_HOSTAP_FIRMWARE=y -CONFIG_HOSTAP=m -CONFIG_HOSTAP_PCI=m -CONFIG_HOSTAP_PLX=m -CONFIG_HOTPLUG_PCI_ACPI_IBM=m -CONFIG_HOTPLUG_PCI_ACPI=y -CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m -CONFIG_HOTPLUG_PCI_CPCI=y -CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m -CONFIG_HOTPLUG_PCI_SHPC=y -# CONFIG_HP03 is not set -CONFIG_HP100=m -# CONFIG_HP206C is not set -CONFIG_HP_ACCEL=m -# CONFIG_HPET is not set -CONFIG_HP_WATCHDOG=m -# CONFIG_HPWDT_NMI_DECODING is not set -CONFIG_HP_WIRELESS=m -CONFIG_HP_WMI=m -CONFIG_HSA_AMD=y -CONFIG_HSR=m -# CONFIG_HTS221 is not set -# CONFIG_HTU21 is not set -# CONFIG_HUAWEI_WMI is not set -# CONFIG_HUGETLBFS is not set -CONFIG_HVC_DRIVER=y -CONFIG_HWMON=m -CONFIG_HWMON_VID=m -CONFIG_HW_RANDOM_AMD=m -CONFIG_HW_RANDOM_INTEL=m -CONFIG_HW_RANDOM=m -CONFIG_HW_RANDOM_VIA=m -# CONFIG_HW_RANDOM_VIRTIO is not set -CONFIG_HYPERV_BALLOON=m -CONFIG_HYPERV_IOMMU=y -CONFIG_HYPERVISOR_GUEST=y -CONFIG_HYPERV_KEYBOARD=m -CONFIG_HYPERV=m -CONFIG_HYPERV_NET=m -CONFIG_HYPERV_STORAGE=m -CONFIG_HYPERV_TIMER=y -CONFIG_HYPERV_UTILS=m -CONFIG_HYPERV_VSOCKETS=m -CONFIG_I2C_ALGOBIT=m -CONFIG_I2C_ALGOPCA=m -CONFIG_I2C_ALI1535=m -CONFIG_I2C_ALI1563=m -CONFIG_I2C_ALI15X3=m -CONFIG_I2C_AMD756=m -CONFIG_I2C_AMD756_S4882=m -CONFIG_I2C_AMD8111=m -CONFIG_I2C_AMD_MP2=m -CONFIG_I2C_CHARDEV=m -CONFIG_I2C_DESIGNWARE_BAYTRAIL=y -CONFIG_I2C_DESIGNWARE_CORE=m -CONFIG_I2C_DESIGNWARE_PCI=m -CONFIG_I2C_DESIGNWARE_PLATFORM=m -# CONFIG_I2C_DESIGNWARE_SLAVE is not set -CONFIG_I2C_DIOLAN_U2C=m -CONFIG_I2C_DLN2=m -CONFIG_I2C_EMEV2=m -CONFIG_I2C_HID=m -CONFIG_I2C_I801=m -CONFIG_I2C_ISCH=m -CONFIG_I2C_ISMT=m -CONFIG_I2C=m -CONFIG_I2C_MLXCPLD=m -# CONFIG_I2C_MUX_LTC4306 is not set -CONFIG_I2C_MUX=m -CONFIG_I2C_MUX_MLXCPLD=m -CONFIG_I2C_MUX_PCA9541=m -CONFIG_I2C_MUX_REG=m -CONFIG_I2C_NFORCE2=m -CONFIG_I2C_NFORCE2_S4985=m -CONFIG_I2C_NVIDIA_GPU=m -CONFIG_I2C_OCORES=m -CONFIG_I2C_PARPORT_LIGHT=m -CONFIG_I2C_PARPORT=m -CONFIG_I2C_PCA_PLATFORM=m -CONFIG_I2C_PIIX4=m -CONFIG_I2C_ROBOTFUZZ_OSIF=m -CONFIG_I2C_SCMI=m -CONFIG_I2C_SIMTEC=m -CONFIG_I2C_SIS5595=m -CONFIG_I2C_SIS630=m -CONFIG_I2C_SIS96X=m -CONFIG_I2C_SLAVE_EEPROM=m -CONFIG_I2C_SLAVE=y -CONFIG_I2C_SMBUS=m -CONFIG_I2C_STUB=m -CONFIG_I2C_TAOS_EVM=m -CONFIG_I2C_TINY_USB=m -CONFIG_I2C_VIA=m -CONFIG_I2C_VIAPRO=m -CONFIG_I2C_VIPERBOARD=m -CONFIG_I2C_XILINX=m -CONFIG_I40E=m -CONFIG_I40EVF=m -CONFIG_I6300ESB_WDT=m -CONFIG_I82092=m -CONFIG_I8K=m -# CONFIG_IAQCORE is not set -CONFIG_IAVF=m -CONFIG_IB700_WDT=m -CONFIG_IBM_ASM=m -CONFIG_IBMASR=m -CONFIG_IBM_RTL=m -CONFIG_ICE=m -CONFIG_ICPLUS_PHY=m -CONFIG_IDEAPAD_LAPTOP=m -CONFIG_IE6XX_WDT=m -CONFIG_IFB=m -CONFIG_IGB_DCA=y -CONFIG_IGB_HWMON=y -CONFIG_IGB=m -CONFIG_IGBVF=m -CONFIG_IGC=m -CONFIG_IIO_BUFFER_CB=m -# CONFIG_IIO_BUFFER_HW_CONSUMER is not set -CONFIG_IIO_BUFFER=y -CONFIG_IIO_CONFIGFS=m -CONFIG_IIO_CONSUMERS_PER_TRIGGER=2 -# CONFIG_IIO_HRTIMER_TRIGGER is not set -CONFIG_IIO_INTERRUPT_TRIGGER=m -CONFIG_IIO_KFIFO_BUF=m -CONFIG_IIO=m -# CONFIG_IIO_SIMPLE_DUMMY is not set -# CONFIG_IIO_ST_ACCEL_3AXIS is not set -# CONFIG_IIO_ST_GYRO_3AXIS is not set -# CONFIG_IIO_ST_LSM6DSX is not set -# CONFIG_IIO_ST_MAGN_3AXIS is not set -# CONFIG_IIO_ST_PRESS is not set -CONFIG_IIO_SW_DEVICE=m -CONFIG_IIO_SW_TRIGGER=m -CONFIG_IIO_SYSFS_TRIGGER=m -CONFIG_IIO_TIGHTLOOP_TRIGGER=m -CONFIG_IIO_TRIGGERED_BUFFER=m -CONFIG_IIO_TRIGGER=y -CONFIG_IKCONFIG_PROC=y -CONFIG_IKCONFIG=y -# CONFIG_INA2XX_ADC is not set -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_ESP_OFFLOAD=m -CONFIG_INET6_IPCOMP=m -CONFIG_INET6_TUNNEL=m -CONFIG_INET6_XFRM_TUNNEL=m -CONFIG_INET_AH=m -# CONFIG_INET_DIAG_DESTROY is not set -CONFIG_INET_DIAG=m -CONFIG_INET_ESP=m -CONFIG_INET_ESP_OFFLOAD=m -CONFIG_INET_IPCOMP=m -CONFIG_INET_RAW_DIAG=m -CONFIG_INET_SCTP_DIAG=m -CONFIG_INET_TCP_DIAG=m -CONFIG_INET_TUNNEL=m -CONFIG_INET_UDP_DIAG=m -CONFIG_INET_XFRM_TUNNEL=m -CONFIG_INPUT_88PM80X_ONKEY=m -# CONFIG_INPUT_AXP20X_PEK is not set -CONFIG_INPUT_BMA150=m -# CONFIG_INPUT_DA9063_ONKEY is not set -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_MATRIXKMAP=m -CONFIG_INPUT_MOUSEDEV_PSAUX=y -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_MOUSEDEV=y -CONFIG_INPUT_PCSPKR=m -CONFIG_INPUT_POLLDEV=m -# CONFIG_INPUT_REGULATOR_HAPTIC is not set -# CONFIG_INPUT_RETU_PWRBUTTON is not set -CONFIG_INPUT_SPARSEKMAP=m -CONFIG_INPUT_UINPUT=m -CONFIG_INPUT_YEALINK=m -CONFIG_INT3406_THERMAL=m -CONFIG_INT340X_THERMAL=m -CONFIG_INTEL_BXT_PMIC_THERMAL=m -CONFIG_INTEL_BXTWC_PMIC_TMU=m -CONFIG_INTEL_GTT=m -CONFIG_INTEL_HID_EVENT=m -CONFIG_INTEL_IDLE=y -CONFIG_INTEL_IDMA64=m -CONFIG_INTEL_IOATDMA=m -CONFIG_INTEL_IOMMU_DEFAULT_ON=y -CONFIG_INTEL_IOMMU_SVM=y -CONFIG_INTEL_IPS=m -CONFIG_INTEL_ISH_FIRMWARE_DOWNLOADER=m -CONFIG_INTEL_ISH_HID=m -CONFIG_INTEL_MEI_HDCP=m -CONFIG_INTEL_MEI=m -CONFIG_INTEL_MEI_ME=m -CONFIG_INTEL_MEI_TXE=m -CONFIG_INTEL_MEI_WDT=m -CONFIG_INTEL_MENLOW=m -CONFIG_INTEL_MIC_BUS=m -# CONFIG_INTEL_MIC_X100_DMA is not set -CONFIG_INTEL_OAKTRAIL=m -CONFIG_INTEL_PCH_THERMAL=m -CONFIG_INTEL_PMC_CORE=y -CONFIG_INTEL_PMC_IPC=m -CONFIG_INTEL_POWERCLAMP=m -CONFIG_INTEL_PUNIT_IPC=m -CONFIG_INTEL_RST=m -CONFIG_INTEL_SMARTCONNECT=m -CONFIG_INTEL_SOC_DTS_IOSF_CORE=m -CONFIG_INTEL_SOC_DTS_THERMAL=m -CONFIG_INTEL_SOC_PMIC_BXTWC=m -CONFIG_INTEL_SPEED_SELECT_INTERFACE=m -CONFIG_INTEL_TELEMETRY=m -CONFIG_INTEL_TURBO_MAX_3=y -CONFIG_INTEL_VBTN=m -CONFIG_INTEL_WMI_THUNDERBOLT=m -CONFIG_INTEL_XWAY_PHY=m -# CONFIG_INV_MPU6050_I2C is not set -# CONFIG_IOMMU_DEBUG is not set -CONFIG_IONIC=m -CONFIG_IOSCHED_BFQ=y -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_SRH=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_IP6_NF_TARGET_NPT=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_SYNPROXY=m -CONFIG_IP_FIB_TRIE_STATS=y -# CONFIG_IPMB_DEVICE_INTERFACE is not set -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_DMI_DECODE=y -CONFIG_IPMI_HANDLER=m -# CONFIG_IPMI_PANIC_EVENT is not set -CONFIG_IPMI_PLAT_DATA=y -# CONFIG_IPMI_POWEROFF is not set -CONFIG_IPMI_SI=m -# CONFIG_IPMI_SSIF is not set -# CONFIG_IPMI_WATCHDOG is not set -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_MATCH_AH=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_RPFILTER=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_NETMAP=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_SYNPROXY=m -CONFIG_IP_NF_TARGET_TTL=m -# CONFIG_IP_PIMSM_V1 is not set -# CONFIG_IP_PIMSM_V2 is not set -# CONFIG_IP_PNP is not set -CONFIG_IP_ROUTE_CLASSID=y -CONFIG_IP_SCTP=m -CONFIG_IP_SET_BITMAP_IP=m -CONFIG_IP_SET_BITMAP_IPMAC=m -CONFIG_IP_SET_BITMAP_PORT=m -CONFIG_IP_SET_HASH_IP=m -CONFIG_IP_SET_HASH_IPMAC=m -CONFIG_IP_SET_HASH_IPMARK=m -CONFIG_IP_SET_HASH_IPPORTIP=m -CONFIG_IP_SET_HASH_IPPORT=m -CONFIG_IP_SET_HASH_IPPORTNET=m -CONFIG_IP_SET_HASH_MAC=m -CONFIG_IP_SET_HASH_NETIFACE=m -CONFIG_IP_SET_HASH_NET=m -CONFIG_IP_SET_HASH_NETNET=m -CONFIG_IP_SET_HASH_NETPORT=m -CONFIG_IP_SET_HASH_NETPORTNET=m -CONFIG_IP_SET_LIST_SET=m -CONFIG_IP_SET=m -CONFIG_IP_SET_MAX=256 -CONFIG_IPV6_GRE=m -CONFIG_IPV6_ILA=m -CONFIG_IPV6=m -CONFIG_IPV6_MIP6=m -CONFIG_IPV6_MULTIPLE_TABLES=y -# CONFIG_IPV6_ROUTE_INFO is not set -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_IPV6_SEG6_HMAC=y -CONFIG_IPV6_SEG6_LWTUNNEL=y -CONFIG_IPV6_SIT=m -CONFIG_IPV6_SUBTREES=y -CONFIG_IPV6_TUNNEL=m -CONFIG_IPV6_VTI=m -CONFIG_IPVLAN_L3S=y -CONFIG_IPVLAN=m -CONFIG_IPVTAP=m -# CONFIG_IPW2100_DEBUG is not set -CONFIG_IPW2100=m -CONFIG_IPW2100_MONITOR=y -# CONFIG_IPW2200_DEBUG is not set -CONFIG_IPW2200=m -CONFIG_IPW2200_MONITOR=y -CONFIG_IPW2200_PROMISCUOUS=y -CONFIG_IPW2200_QOS=y -CONFIG_IPW2200_RADIOTAP=y -CONFIG_IRQ_BYPASS_MANAGER=m -CONFIG_IRQ_POLL=y -CONFIG_IRQ_REMAP=y -CONFIG_ISA_BUS_API=y -# CONFIG_ISA_BUS is not set -CONFIG_ISCSI_BOOT_SYSFS=m -# CONFIG_ISCSI_IBFT is not set -CONFIG_ISCSI_TCP=m -# CONFIG_ISL29125 is not set -# CONFIG_ISL29501 is not set -CONFIG_ISO9660_FS=m -CONFIG_IT8712F_WDT=m -CONFIG_IT87_WDT=m -CONFIG_ITCO_VENDOR_SUPPORT=y -CONFIG_ITCO_WDT=m -# CONFIG_ITG3200 is not set -CONFIG_IWL3945=m -CONFIG_IWL4965=m -CONFIG_IWLDVM=m -# CONFIG_IWLEGACY_DEBUG is not set -CONFIG_IWLEGACY=m -CONFIG_IWLMVM=m -CONFIG_IWLWIFI_BCAST_FILTERING=y -# CONFIG_IWLWIFI_DEBUG is not set -CONFIG_IWLWIFI_LEDS=y -CONFIG_IWLWIFI=m -CONFIG_IWLWIFI_OPMODE_MODULAR=y -CONFIG_IXGBE_DCA=y -CONFIG_IXGBE_HWMON=y -CONFIG_IXGBE_IPSEC=y -CONFIG_IXGBE=m -CONFIG_IXGBEVF_IPSEC=y -CONFIG_IXGBEVF=m -CONFIG_IXGB=m -# CONFIG_JAILHOUSE_GUEST is not set -CONFIG_JBD2=m -# CONFIG_JFS_DEBUG is not set -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -CONFIG_JFS_SECURITY=y -CONFIG_JFS_STATISTICS=y -CONFIG_JME=m -CONFIG_JOYSTICK_A3D=m -CONFIG_JOYSTICK_ADI=m -CONFIG_JOYSTICK_ANALOG=m -CONFIG_JOYSTICK_AS5011=m -CONFIG_JOYSTICK_COBRA=m -CONFIG_JOYSTICK_DB9=m -CONFIG_JOYSTICK_FSIA6B=m -CONFIG_JOYSTICK_GAMECON=m -CONFIG_JOYSTICK_GF2K=m -CONFIG_JOYSTICK_GRIP=m -CONFIG_JOYSTICK_GRIP_MP=m -CONFIG_JOYSTICK_GUILLEMOT=m -CONFIG_JOYSTICK_IFORCE_232=m -CONFIG_JOYSTICK_IFORCE=m -CONFIG_JOYSTICK_IFORCE_USB=m -CONFIG_JOYSTICK_INTERACT=m -CONFIG_JOYSTICK_JOYDUMP=m -CONFIG_JOYSTICK_MAGELLAN=m -CONFIG_JOYSTICK_PXRC=m -CONFIG_JOYSTICK_SIDEWINDER=m -CONFIG_JOYSTICK_SPACEBALL=m -CONFIG_JOYSTICK_SPACEORB=m -CONFIG_JOYSTICK_STINGER=m -CONFIG_JOYSTICK_TMDC=m -CONFIG_JOYSTICK_TURBOGRAFX=m -CONFIG_JOYSTICK_TWIDJOY=m -# CONFIG_JOYSTICK_WALKERA0701 is not set -CONFIG_JOYSTICK_WARRIOR=m -CONFIG_JOYSTICK_XPAD_FF=y -CONFIG_JOYSTICK_XPAD_LEDS=y -CONFIG_JOYSTICK_XPAD=m -CONFIG_JOYSTICK_ZHENHUA=m -# CONFIG_JSA1212 is not set -# CONFIG_JUMP_LABEL is not set -# CONFIG_KARMA_PARTITION is not set -# CONFIG_KERNEL_GZIP is not set -CONFIG_KERNEL_XZ=y -CONFIG_KEYBOARD_ADC=m -CONFIG_KEYBOARD_QT1050=m -CONFIG_KEYBOARD_TM2_TOUCHKEY=m -# CONFIG_KMX61 is not set -# CONFIG_KPC2000 is not set -# CONFIG_KPROBES is not set -# CONFIG_KS7010 is not set -CONFIG_KS8842=m -CONFIG_KS8851_MLL=m -CONFIG_KSM=y -CONFIG_KSZ884X_PCI=m -CONFIG_KVM_AMD=m -CONFIG_KVM_AMD_SEV=y -CONFIG_KVM_ASYNC_PF=y -CONFIG_KVM_COMPAT=y -# CONFIG_KVM_DEBUG_FS is not set -CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y -CONFIG_KVM_GUEST=y -CONFIG_KVM_INTEL=m -CONFIG_KVM=m -CONFIG_KVM_MMIO=y -CONFIG_KVM_VFIO=y -# CONFIG_KXCJK1013 is not set -# CONFIG_KXSD9 is not set -CONFIG_LAN743X=m -CONFIG_LCD_CLASS_DEVICE=m -# CONFIG_LCD_PLATFORM is not set -# CONFIG_LDM_PARTITION is not set -CONFIG_LEDS_BD2802=m -CONFIG_LEDS_BLINKM=m -CONFIG_LEDS_CLEVO_MAIL=m -CONFIG_LEDS_INTEL_SS4200=m -CONFIG_LEDS_LM3530=m -CONFIG_LEDS_LM3532=m -CONFIG_LEDS_LM355x=m -CONFIG_LEDS_LP3944=m -CONFIG_LEDS_LP5521=m -CONFIG_LEDS_LP5523=m -CONFIG_LEDS_LP5562=m -CONFIG_LEDS_LP55XX_COMMON=m -CONFIG_LEDS_LP8501=m -CONFIG_LEDS_MLXREG=m -CONFIG_LEDS_NIC78BX=m -CONFIG_LEDS_PCA9532=m -CONFIG_LEDS_PCA955X=m -CONFIG_LEDS_PCA963X=m -CONFIG_LEDS_REGULATOR=m -CONFIG_LEDS_TCA6507=m -CONFIG_LEDS_TLC591XX=m -CONFIG_LEDS_TRIGGER_AUDIO=m -CONFIG_LEDS_TRIGGER_BACKLIGHT=m -CONFIG_LEDS_TRIGGER_CAMERA=m -CONFIG_LEDS_TRIGGER_DEFAULT_ON=m -CONFIG_LEDS_TRIGGER_HEARTBEAT=m -CONFIG_LEDS_TRIGGER_ONESHOT=m -CONFIG_LEDS_TRIGGER_TIMER=m -CONFIG_LEDS_TRIGGER_TRANSIENT=m -CONFIG_LEDS_USER=m -CONFIG_LEGACY_VSYSCALL_EMULATE=y -# CONFIG_LEGACY_VSYSCALL_XONLY is not set -CONFIG_LG_LAPTOP=m -CONFIG_LIB80211_CRYPT_CCMP=m -CONFIG_LIB80211_CRYPT_TKIP=m -CONFIG_LIB80211_CRYPT_WEP=m -# CONFIG_LIB80211_DEBUG is not set -CONFIG_LIB80211=m -CONFIG_LIBCRC32C=m -CONFIG_LIBERTAS_CS=m -# CONFIG_LIBERTAS_DEBUG is not set -CONFIG_LIBERTAS=m -# CONFIG_LIBERTAS_MESH is not set -CONFIG_LIBERTAS_SDIO=m -# CONFIG_LIBERTAS_THINFIRM_DEBUG is not set -CONFIG_LIBERTAS_THINFIRM=m -CONFIG_LIBERTAS_THINFIRM_USB=m -CONFIG_LIBERTAS_USB=m -CONFIG_LIBFC=m -CONFIG_LIBFCOE=m -# CONFIG_LIBIPW_DEBUG is not set -CONFIG_LIBIPW=m -CONFIG_LIBNVDIMM=y -# CONFIG_LIDAR_LITE_V2 is not set -CONFIG_LIQUIDIO=m -CONFIG_LIQUIDIO_VF=m -# CONFIG_LIRC is not set -CONFIG_LLC=m -# CONFIG_LMP91000 is not set -CONFIG_LOCKD=m -CONFIG_LOG_BUF_SHIFT=15 -CONFIG_LOGIG940_FF=y -CONFIG_LOGIRUMBLEPAD2_FF=y -# CONFIG_LOGO is not set -CONFIG_LPC_ICH=m -# CONFIG_LP_CONSOLE is not set -CONFIG_LPC_SCH=m -CONFIG_LSI_ET1011C_PHY=m -CONFIG_LSM="yama,loadpin,safesetid,integrity" -# CONFIG_LTC2471 is not set -# CONFIG_LTC2485 is not set -# CONFIG_LTC2497 is not set -CONFIG_LTE_GDM724X=m -# CONFIG_LTR501 is not set -# CONFIG_LV0104CS is not set -CONFIG_LWTUNNEL_BPF=y -CONFIG_LWTUNNEL=y -CONFIG_LXT_PHY=m -CONFIG_LZ4_COMPRESS=y -CONFIG_LZ4HC_COMPRESS=y -# CONFIG_M62332 is not set -CONFIG_MAC80211_HWSIM=m -CONFIG_MAC80211=m -CONFIG_MAC80211_MESH=y -CONFIG_MACB=m -CONFIG_MACB_PCI=m -CONFIG_MACB_USE_HWSTAMP=y -CONFIG_MACHZ_WDT=m -# CONFIG_MAC_PARTITION is not set -CONFIG_MACSEC=m -CONFIG_MACVLAN=m -CONFIG_MACVTAP=m -# CONFIG_MAG3110 is not set -CONFIG_MANTIS_CORE=m -CONFIG_MARVELL_10G_PHY=m -CONFIG_MARVELL_PHY=m -# CONFIG_MAX1363 is not set -# CONFIG_MAX30100 is not set -# CONFIG_MAX30102 is not set -# CONFIG_MAX44000 is not set -# CONFIG_MAX44009 is not set -# CONFIG_MAX517 is not set -# CONFIG_MAX5432 is not set -CONFIG_MAX63XX_WATCHDOG=m -# CONFIG_MAX9611 is not set -# CONFIG_MB1232 is not set -# CONFIG_MC3230 is not set -# CONFIG_MCP3422 is not set -# CONFIG_MCP4018 is not set -# CONFIG_MCP4531 is not set -# CONFIG_MCP4725 is not set -# CONFIG_MD_CLUSTER is not set -CONFIG_MDIO_BCM_UNIMAC=m -CONFIG_MDIO_BITBANG=m -CONFIG_MDIO_BUS=m -CONFIG_MDIO_CAVIUM=m -CONFIG_MDIO_DEVICE=m -CONFIG_MDIO_I2C=m -CONFIG_MDIO=m -CONFIG_MDIO_THUNDER=m -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID10=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID456=m -CONFIG_MEDIA_ALTERA_CI=m -CONFIG_MEDIA_ANALOG_TV_SUPPORT=y -CONFIG_MEDIA_ATTACH=y -CONFIG_MEDIA_CAMERA_SUPPORT=y -# CONFIG_MEDIA_CEC_RC is not set -# CONFIG_MEDIA_CEC_SUPPORT is not set -CONFIG_MEDIA_COMMON_OPTIONS=y -CONFIG_MEDIA_CONTROLLER_DVB=y -CONFIG_MEDIA_CONTROLLER=y -CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y -CONFIG_MEDIA_PCI_SUPPORT=y -# CONFIG_MEDIA_RADIO_SUPPORT is not set -CONFIG_MEDIA_SDR_SUPPORT=y -CONFIG_MEDIA_SUBDRV_AUTOSELECT=y -CONFIG_MEDIA_SUPPORT=m -CONFIG_MEDIA_TUNER_E4000=m -CONFIG_MEDIA_TUNER_FC0011=m -CONFIG_MEDIA_TUNER_FC0012=m -CONFIG_MEDIA_TUNER_FC0013=m -CONFIG_MEDIA_TUNER_FC2580=m -CONFIG_MEDIA_TUNER_IT913X=m -CONFIG_MEDIA_TUNER=m -CONFIG_MEDIA_TUNER_M88RS6000T=m -CONFIG_MEDIA_TUNER_MAX2165=m -CONFIG_MEDIA_TUNER_MC44S803=m -CONFIG_MEDIA_TUNER_MT2060=m -CONFIG_MEDIA_TUNER_MT2063=m -CONFIG_MEDIA_TUNER_MT20XX=m -CONFIG_MEDIA_TUNER_MT2131=m -CONFIG_MEDIA_TUNER_MT2266=m -CONFIG_MEDIA_TUNER_MXL301RF=m -CONFIG_MEDIA_TUNER_MXL5005S=m -CONFIG_MEDIA_TUNER_MXL5007T=m -CONFIG_MEDIA_TUNER_QM1D1B0004=m -CONFIG_MEDIA_TUNER_QM1D1C0042=m -CONFIG_MEDIA_TUNER_QT1010=m -CONFIG_MEDIA_TUNER_R820T=m -CONFIG_MEDIA_TUNER_SI2157=m -CONFIG_MEDIA_TUNER_SIMPLE=m -CONFIG_MEDIA_TUNER_TDA18212=m -CONFIG_MEDIA_TUNER_TDA18218=m -CONFIG_MEDIA_TUNER_TDA18250=m -CONFIG_MEDIA_TUNER_TDA18271=m -CONFIG_MEDIA_TUNER_TDA827X=m -CONFIG_MEDIA_TUNER_TDA8290=m -CONFIG_MEDIA_TUNER_TDA9887=m -CONFIG_MEDIA_TUNER_TEA5761=m -CONFIG_MEDIA_TUNER_TEA5767=m -CONFIG_MEDIA_TUNER_TUA9001=m -CONFIG_MEDIA_TUNER_XC2028=m -CONFIG_MEDIA_TUNER_XC4000=m -CONFIG_MEDIA_TUNER_XC5000=m -CONFIG_MEDIA_USB_SUPPORT=y -CONFIG_MEGARAID_LEGACY=m -CONFIG_MEGARAID_MAILBOX=m -CONFIG_MEGARAID_MM=m -CONFIG_MEGARAID_NEWGEN=y -CONFIG_MEGARAID_SAS=m -CONFIG_MEMCG_KMEM=y -CONFIG_MEMCG_SWAP_ENABLED=y -CONFIG_MEMCG_SWAP=y -CONFIG_MEMCG=y -CONFIG_MEMORY_BALLOON=y -# CONFIG_MEM_SOFT_DIRTY is not set -# CONFIG_MEMSTICK_DEBUG is not set -CONFIG_MEMSTICK_JMICRON_38X=m -CONFIG_MEMSTICK=m -CONFIG_MEMSTICK_R592=m -CONFIG_MEMSTICK_REALTEK_PCI=m -CONFIG_MEMSTICK_REALTEK_USB=m -CONFIG_MEMSTICK_TIFM_MS=m -# CONFIG_MEMSTICK_UNSAFE_RESUME is not set -CONFIG_MFD_88PM800=m -CONFIG_MFD_88PM805=m -CONFIG_MFD_AXP20X_I2C=m -CONFIG_MFD_AXP20X=m -CONFIG_MFD_BCM590XX=m -CONFIG_MFD_CORE=m -CONFIG_MFD_DA9062=m -CONFIG_MFD_DA9063=m -CONFIG_MFD_DA9150=m -CONFIG_MFD_DLN2=m -CONFIG_MFD_INTEL_LPSS_ACPI=m -CONFIG_MFD_INTEL_LPSS=m -CONFIG_MFD_INTEL_LPSS_PCI=m -CONFIG_MFD_MAX14577=m -CONFIG_MFD_MAX77693=m -CONFIG_MFD_MAX8907=m -CONFIG_MFD_RETU=m -CONFIG_MFD_RT5033=m -CONFIG_MFD_SYSCON=y -CONFIG_MFD_TI_LP873X=m -CONFIG_MFD_TPS65086=m -CONFIG_MFD_TPS65912_I2C=m -CONFIG_MFD_TPS65912=m -CONFIG_MFD_TQMX86=m -CONFIG_MFD_VIPERBOARD=m -CONFIG_MFD_WL1273_CORE=m -CONFIG_MICREL_PHY=m -CONFIG_MICROCHIP_PHY=m -CONFIG_MICROCHIP_T1_PHY=m -CONFIG_MICROCODE_OLD_INTERFACE=y -CONFIG_MICROSEMI_PHY=m -CONFIG_MII=m -# CONFIG_MINIX_SUBPARTITION is not set -CONFIG_MISC_RTSX=m -CONFIG_MISC_RTSX_PCI=m -CONFIG_MISC_RTSX_USB=m -CONFIG_MLX4_CORE_GEN2=y -CONFIG_MLX4_CORE=m -CONFIG_MLX4_DEBUG=y -CONFIG_MLX4_EN=m -# CONFIG_MLX90614 is not set -# CONFIG_MLX90632 is not set -CONFIG_MLXFW=m -CONFIG_MLX_PLATFORM=m -CONFIG_MLXSW_CORE_HWMON=y -CONFIG_MLXSW_CORE=m -CONFIG_MLXSW_CORE_THERMAL=y -CONFIG_MLXSW_I2C=m -CONFIG_MLXSW_MINIMAL=m -CONFIG_MLXSW_PCI=m -CONFIG_MLXSW_SPECTRUM=m -CONFIG_MLXSW_SWITCHIB=m -CONFIG_MLXSW_SWITCHX2=m -# CONFIG_MMA7455_I2C is not set -CONFIG_MMA7660=m -# CONFIG_MMA8452 is not set -# CONFIG_MMA9551 is not set -# CONFIG_MMA9553 is not set -CONFIG_MMC35240=m -CONFIG_MMC_BLOCK=m -CONFIG_MMC_BLOCK_MINORS=8 -CONFIG_MMC_CB710=m -CONFIG_MMC_CQHCI=m -# CONFIG_MMC_DEBUG is not set -CONFIG_MMC=m -CONFIG_MMC_MTK=m -CONFIG_MMC_REALTEK_PCI=m -CONFIG_MMC_REALTEK_USB=m -CONFIG_MMC_RICOH_MMC=y -CONFIG_MMC_SDHCI_ACPI=m -CONFIG_MMC_SDHCI_IO_ACCESSORS=y -CONFIG_MMC_SDHCI=m -CONFIG_MMC_SDHCI_PCI=m -# CONFIG_MMC_SDHCI_PLTFM is not set -CONFIG_MMC_SDRICOH_CS=m -# CONFIG_MMC_TEST is not set -CONFIG_MMC_TIFM_SD=m -CONFIG_MMC_TOSHIBA_PCI=m -CONFIG_MMC_USDHI6ROL0=m -CONFIG_MMC_USHC=m -CONFIG_MMC_VIA_SDMMC=m -CONFIG_MMC_VUB300=m -CONFIG_MMC_WBSD=m -# CONFIG_MODULE_COMPRESS_GZIP is not set -CONFIG_MODULE_COMPRESS_XZ=y -CONFIG_MODULE_COMPRESS=y -# CONFIG_MODULE_FORCE_UNLOAD is not set -# CONFIG_MOST is not set -CONFIG_MOUSE_APPLETOUCH=m -CONFIG_MOUSE_BCM5974=m -CONFIG_MOUSE_CYAPA=m -CONFIG_MOUSE_ELAN_I2C_I2C=y -CONFIG_MOUSE_ELAN_I2C=m -CONFIG_MOUSE_ELAN_I2C_SMBUS=y -CONFIG_MOUSE_PS2_ELANTECH=y -CONFIG_MOUSE_PS2_SENTELIC=y -CONFIG_MOUSE_PS2_TOUCHKIT=y -CONFIG_MOUSE_PS2_VMMOUSE=y -CONFIG_MOUSE_SYNAPTICS_USB=m -# CONFIG_MPL115_I2C is not set -# CONFIG_MPL3115 is not set -CONFIG_MPLS_IPTUNNEL=m -CONFIG_MPLS_ROUTING=m -CONFIG_MPLS=y -# CONFIG_MPU3050_I2C is not set -# CONFIG_MS5611 is not set -# CONFIG_MS5637 is not set -CONFIG_MS_BLOCK=m -CONFIG_MSCC_OCELOT_SWITCH=m -CONFIG_MSDOS_FS=m -CONFIG_MSI_LAPTOP=m -CONFIG_MSI_WMI=m -CONFIG_MSPRO_BLOCK=m -CONFIG_MT7601U=m -CONFIG_MT7603E=m -CONFIG_MT7615E=m -CONFIG_MT76_CORE=m -CONFIG_MT76_LEDS=y -CONFIG_MT76_USB=m -CONFIG_MT76x02_LIB=m -CONFIG_MT76x02_USB=m -CONFIG_MT76x0_COMMON=m -CONFIG_MT76x0E=m -CONFIG_MT76x0U=m -CONFIG_MT76x2_COMMON=m -CONFIG_MT76x2E=m -CONFIG_MT76x2U=m -CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=0 -CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=1 -CONFIG_MTRR_SANITIZER=y -CONFIG_MUSB_PIO_ONLY=y -CONFIG_MVMDIO=m -CONFIG_MWAVE=m -CONFIG_MWIFIEX=m -CONFIG_MWIFIEX_PCIE=m -CONFIG_MWIFIEX_SDIO=m -CONFIG_MWIFIEX_USB=m -CONFIG_MWL8K=m -# CONFIG_MXC4005 is not set -# CONFIG_MXC6255 is not set -CONFIG_MXM_WMI=m -CONFIG_MYRI10GE_DCA=y -CONFIG_MYRI10GE=m -CONFIG_NATIONAL_PHY=m -CONFIG_NATSEMI=m -# CONFIG_NAU7802 is not set -CONFIG_ND_BLK=m -CONFIG_ND_BTT=m -CONFIG_ND_CLAIM=y -CONFIG_NE2K_PCI=m -CONFIG_NET_ACT_CONNMARK=m -CONFIG_NET_ACT_CSUM=m -CONFIG_NET_ACT_CTINFO=m -CONFIG_NET_ACT_CT=m -CONFIG_NET_ACT_GACT=m -CONFIG_NET_ACT_IFE=m -CONFIG_NET_ACT_IPT=m -CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_MPLS=m -CONFIG_NET_ACT_NAT=m -CONFIG_NET_ACT_PEDIT=m -CONFIG_NET_ACT_POLICE=m -CONFIG_NET_ACT_SAMPLE=m -CONFIG_NET_ACT_SIMP=m -CONFIG_NET_ACT_SKBEDIT=m -CONFIG_NET_ACT_SKBMOD=m -CONFIG_NET_ACT_TUNNEL_KEY=m -CONFIG_NET_ACT_VLAN=m -CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_BPF=m -CONFIG_NET_CLS_CGROUP=m -CONFIG_NET_CLS_FLOWER=m -CONFIG_NET_CLS_FLOW=m -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_MATCHALL=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_U32=m -# CONFIG_NETCONSOLE is not set -CONFIG_NET_DEVLINK=y -CONFIG_NETDEVSIM=m -CONFIG_NET_DSA_BCM_SF2=m -CONFIG_NET_DSA_LANTIQ_GSWIP=m -CONFIG_NET_DSA_LOOP=m -CONFIG_NET_DSA=m -CONFIG_NET_DSA_MICROCHIP_KSZ8795=m -# CONFIG_NET_DSA_MICROCHIP_KSZ9477 is not set -CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON=m -CONFIG_NET_DSA_MT7530=m -CONFIG_NET_DSA_MV88E6060=m -CONFIG_NET_DSA_MV88E6XXX_GLOBAL2=y -CONFIG_NET_DSA_MV88E6XXX=m -CONFIG_NET_DSA_MV88E6XXX_PTP=y -CONFIG_NET_DSA_QCA8K=m -CONFIG_NET_DSA_REALTEK_SMI=m -CONFIG_NET_DSA_SMSC_LAN9303_I2C=m -CONFIG_NET_DSA_SMSC_LAN9303=m -CONFIG_NET_DSA_SMSC_LAN9303_MDIO=m -CONFIG_NET_DSA_TAG_8021Q=m -CONFIG_NET_DSA_TAG_BRCM_COMMON=m -CONFIG_NET_DSA_TAG_BRCM=m -CONFIG_NET_DSA_TAG_BRCM_PREPEND=m -CONFIG_NET_DSA_TAG_DSA=m -CONFIG_NET_DSA_TAG_EDSA=m -CONFIG_NET_DSA_TAG_GSWIP=m -CONFIG_NET_DSA_TAG_KSZ=m -CONFIG_NET_DSA_TAG_LAN9303=m -CONFIG_NET_DSA_TAG_MTK=m -CONFIG_NET_DSA_TAG_QCA=m -CONFIG_NET_DSA_TAG_SJA1105=m -CONFIG_NET_DSA_TAG_TRAILER=m -CONFIG_NET_EGRESS=y -CONFIG_NET_EMATCH_CMP=m -CONFIG_NET_EMATCH_IPSET=m -CONFIG_NET_EMATCH_IPT=m -CONFIG_NET_EMATCH_META=m -CONFIG_NET_EMATCH_NBYTE=m -CONFIG_NET_EMATCH_TEXT=m -CONFIG_NET_EMATCH_U32=m -CONFIG_NET_FAILOVER=m -CONFIG_NETFILTER_ADVANCED=y -CONFIG_NETFILTER_CONNCOUNT=m -CONFIG_NETFILTER_FAMILY_ARP=y -CONFIG_NETFILTER_FAMILY_BRIDGE=y -CONFIG_NETFILTER_NETLINK_ACCT=m -CONFIG_NETFILTER_NETLINK_GLUE_CT=y -CONFIG_NETFILTER_NETLINK_LOG=m -CONFIG_NETFILTER_NETLINK=m -CONFIG_NETFILTER_NETLINK_OSF=m -CONFIG_NETFILTER_NETLINK_QUEUE=m -CONFIG_NETFILTER_SYNPROXY=m -CONFIG_NETFILTER_XTABLES=m -CONFIG_NETFILTER_XT_CONNMARK=m -CONFIG_NETFILTER_XT_MATCH_BPF=m -CONFIG_NETFILTER_XT_MATCH_CGROUP=m -CONFIG_NETFILTER_XT_MATCH_CLUSTER=m -CONFIG_NETFILTER_XT_MATCH_COMMENT=m -CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m -CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m -CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m -CONFIG_NETFILTER_XT_MATCH_CONNMARK=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m -CONFIG_NETFILTER_XT_MATCH_CPU=m -CONFIG_NETFILTER_XT_MATCH_DCCP=m -CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m -CONFIG_NETFILTER_XT_MATCH_DSCP=m -CONFIG_NETFILTER_XT_MATCH_ECN=m -CONFIG_NETFILTER_XT_MATCH_ESP=m -CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m -CONFIG_NETFILTER_XT_MATCH_HELPER=m -CONFIG_NETFILTER_XT_MATCH_HL=m -CONFIG_NETFILTER_XT_MATCH_IPCOMP=m -CONFIG_NETFILTER_XT_MATCH_IPRANGE=m -CONFIG_NETFILTER_XT_MATCH_L2TP=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MARK=m -CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m -CONFIG_NETFILTER_XT_MATCH_NFACCT=m -CONFIG_NETFILTER_XT_MATCH_OSF=m -CONFIG_NETFILTER_XT_MATCH_OWNER=m -CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m -CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m -CONFIG_NETFILTER_XT_MATCH_POLICY=m -CONFIG_NETFILTER_XT_MATCH_QUOTA=m -CONFIG_NETFILTER_XT_MATCH_RATEEST=m -CONFIG_NETFILTER_XT_MATCH_REALM=m -CONFIG_NETFILTER_XT_MATCH_RECENT=m -CONFIG_NETFILTER_XT_MATCH_SCTP=m -CONFIG_NETFILTER_XT_MATCH_SOCKET=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NETFILTER_XT_MATCH_STATISTIC=m -CONFIG_NETFILTER_XT_MATCH_STRING=m -CONFIG_NETFILTER_XT_MATCH_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_TIME=m -CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_NETFILTER_XT_SET=m -# CONFIG_NETFILTER_XT_TARGET_AUDIT is not set -CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m -CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m -CONFIG_NETFILTER_XT_TARGET_CONNMARK=m -CONFIG_NETFILTER_XT_TARGET_CT=m -CONFIG_NETFILTER_XT_TARGET_DSCP=m -CONFIG_NETFILTER_XT_TARGET_HL=m -CONFIG_NETFILTER_XT_TARGET_HMARK=m -CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m -CONFIG_NETFILTER_XT_TARGET_LED=m -CONFIG_NETFILTER_XT_TARGET_MARK=m -CONFIG_NETFILTER_XT_TARGET_NETMAP=m -CONFIG_NETFILTER_XT_TARGET_NFLOG=m -CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m -CONFIG_NETFILTER_XT_TARGET_RATEEST=m -CONFIG_NETFILTER_XT_TARGET_REDIRECT=m -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m -CONFIG_NETFILTER_XT_TARGET_TEE=m -CONFIG_NETFILTER_XT_TARGET_TPROXY=m -CONFIG_NETFILTER_XT_TARGET_TRACE=m -CONFIG_NET_IFE=m -CONFIG_NET_IFE_SKBMARK=m -CONFIG_NET_IFE_SKBPRIO=m -CONFIG_NET_IFE_SKBTCINDEX=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_NET_IPGRE_DEMUX=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPIP=m -CONFIG_NET_IP_TUNNEL=m -CONFIG_NET_KEY=m -CONFIG_NET_KEY_MIGRATE=y -CONFIG_NET_L3_MASTER_DEV=y -CONFIG_NETLINK_DIAG=m -CONFIG_NET_MPLS_GSO=m -CONFIG_NET_NSH=m -CONFIG_NET_SCH_CAKE=m -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_CBS=m -CONFIG_NET_SCH_CHOKE=m -CONFIG_NET_SCH_CODEL=m -CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_ETF=m -CONFIG_NET_SCH_FQ_CODEL=m -CONFIG_NET_SCH_FQ=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_HFSC=m -CONFIG_NET_SCH_HHF=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_SCH_MQPRIO=m -CONFIG_NET_SCH_MULTIQ=m -CONFIG_NET_SCH_NETEM=m -CONFIG_NET_SCH_PIE=m -CONFIG_NET_SCH_PLUG=m -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_QFQ=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFB=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_SKBPRIO=m -CONFIG_NET_SCH_TAPRIO=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SOCK_MSG=y -CONFIG_NET_SWITCHDEV=y -CONFIG_NET_TEAM=m -CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m -CONFIG_NET_TEAM_MODE_BROADCAST=m -CONFIG_NET_TEAM_MODE_LOADBALANCE=m -CONFIG_NET_TEAM_MODE_RANDOM=m -CONFIG_NET_TEAM_MODE_ROUNDROBIN=m -CONFIG_NET_UDP_TUNNEL=m -CONFIG_NET_VRF=m -# CONFIG_NETWORK_SECMARK is not set -CONFIG_NETXEN_NIC=m -CONFIG_NF_CONNTRACK_AMANDA=m -CONFIG_NF_CONNTRACK_BRIDGE=m -CONFIG_NF_CONNTRACK_BROADCAST=m -CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CONNTRACK_FTP=m -CONFIG_NF_CONNTRACK_H323=m -CONFIG_NF_CONNTRACK_IRC=m -CONFIG_NF_CONNTRACK_LABELS=y -CONFIG_NF_CONNTRACK=m -CONFIG_NF_CONNTRACK_MARK=y -CONFIG_NF_CONNTRACK_NETBIOS_NS=m -CONFIG_NF_CONNTRACK_PPTP=m -CONFIG_NF_CONNTRACK_SANE=m -CONFIG_NF_CONNTRACK_SIP=m -CONFIG_NF_CONNTRACK_SNMP=m -CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NF_CONNTRACK_TIMEOUT=y -CONFIG_NF_CONNTRACK_TIMESTAMP=y -CONFIG_NF_CONNTRACK_ZONES=y -CONFIG_NF_CT_NETLINK_HELPER=m -CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_PROTO_DCCP=y -CONFIG_NF_CT_PROTO_GRE=y -CONFIG_NF_CT_PROTO_SCTP=y -CONFIG_NF_CT_PROTO_UDPLITE=y -CONFIG_NF_DEFRAG_IPV4=m -CONFIG_NF_DEFRAG_IPV6=m -CONFIG_NF_DUP_IPV4=m -CONFIG_NF_DUP_IPV6=m -CONFIG_NF_DUP_NETDEV=m -CONFIG_NF_FLOW_TABLE_INET=m -CONFIG_NF_FLOW_TABLE_IPV4=m -CONFIG_NF_FLOW_TABLE_IPV6=m -CONFIG_NF_FLOW_TABLE=m -CONFIG_NF_LOG_BRIDGE=m -CONFIG_NF_LOG_NETDEV=m -CONFIG_NF_NAT_AMANDA=m -CONFIG_NF_NAT_FTP=m -CONFIG_NF_NAT_H323=m -CONFIG_NF_NAT_IRC=m -CONFIG_NF_NAT=m -CONFIG_NF_NAT_PPTP=m -CONFIG_NF_NAT_REDIRECT=y -CONFIG_NF_NAT_SIP=m -CONFIG_NF_NAT_SNMP_BASIC=m -CONFIG_NF_NAT_TFTP=m -CONFIG_NFP_APP_ABM_NIC=y -CONFIG_NFP_APP_FLOWER=y -# CONFIG_NFP_DEBUG is not set -CONFIG_NFP=m -CONFIG_NF_REJECT_IPV4=m -CONFIG_NF_REJECT_IPV6=m -CONFIG_NFS_ACL_SUPPORT=m -# CONFIG_NFSD_BLOCKLAYOUT is not set -CONFIG_NFSD_FLEXFILELAYOUT=y -CONFIG_NFSD=m -CONFIG_NFSD_PNFS=y -# CONFIG_NFSD_SCSILAYOUT is not set -CONFIG_NFSD_V2_ACL=y -CONFIG_NFSD_V3_ACL=y -CONFIG_NFSD_V3=y -CONFIG_NFSD_V4=y -CONFIG_NFS_FSCACHE=y -CONFIG_NFS_FS=m -CONFIG_NF_SOCKET_IPV4=m -CONFIG_NF_SOCKET_IPV6=m -CONFIG_NFS_V2=m -CONFIG_NFS_V3=m -CONFIG_NFS_V4=m -CONFIG_NF_TABLES_ARP=y -CONFIG_NF_TABLES_BRIDGE=m -CONFIG_NF_TABLES_INET=y -CONFIG_NF_TABLES_IPV4=y -CONFIG_NF_TABLES_IPV6=y -CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_NETDEV=y -CONFIG_NF_TABLES_SET=m -CONFIG_NFT_BRIDGE_META=m -CONFIG_NFT_BRIDGE_REJECT=m -CONFIG_NFT_COMPAT=m -CONFIG_NFT_CONNLIMIT=m -CONFIG_NFT_COUNTER=m -CONFIG_NFT_CT=m -CONFIG_NFT_DUP_IPV4=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_DUP_NETDEV=m -CONFIG_NFT_FIB_INET=m -CONFIG_NFT_FIB_IPV4=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_NFT_FIB=m -CONFIG_NFT_FIB_NETDEV=m -CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_FWD_NETDEV=m -CONFIG_NFT_HASH=m -CONFIG_NFT_LIMIT=m -CONFIG_NFT_LOG=m -CONFIG_NFT_MASQ=m -CONFIG_NFT_NAT=m -CONFIG_NFT_NUMGEN=m -CONFIG_NFT_OBJREF=m -CONFIG_NFT_OSF=m -CONFIG_NF_TPROXY_IPV4=m -CONFIG_NF_TPROXY_IPV6=m -CONFIG_NFT_QUEUE=m -CONFIG_NFT_QUOTA=m -CONFIG_NFT_REDIR=m -CONFIG_NFT_REJECT_INET=m -CONFIG_NFT_REJECT_IPV4=m -CONFIG_NFT_REJECT_IPV6=m -CONFIG_NFT_REJECT=m -CONFIG_NFT_SOCKET=m -CONFIG_NFT_SYNPROXY=m -CONFIG_NFT_TPROXY=m -CONFIG_NFT_TUNNEL=m -CONFIG_NFT_XFRM=m -CONFIG_NI903X_WDT=m -CONFIG_NIC7018_WDT=m -CONFIG_NILFS2_FS=m -CONFIG_NIU=m -CONFIG_NI_XGE_MANAGEMENT_ENET=m -CONFIG_NLMON=m -CONFIG_NLS_ASCII=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m -# CONFIG_NOA1305 is not set -# CONFIG_NO_HZ is not set -CONFIG_NORTEL_HERMES=m -CONFIG_NOUVEAU_DEBUG=5 -CONFIG_NOUVEAU_DEBUG_DEFAULT=3 -# CONFIG_NOUVEAU_DEBUG_MMU is not set -CONFIG_NOUVEAU_LEGACY_CTX_SUPPORT=y -CONFIG_NS83820=m -# CONFIG_NTFS_DEBUG is not set -CONFIG_NTFS_FS=m -CONFIG_NTFS_RW=y -CONFIG_NVME_CORE=y -CONFIG_NVME_MULTIPATH=y -# CONFIG_NVME_TARGET is not set -# CONFIG_NVME_TCP is not set -# CONFIG_NVM_PBLK_DEBUG is not set -CONFIG_NVM_PBLK=m -CONFIG_NVM=y -CONFIG_NVRAM=m -CONFIG_NV_TCO=m -CONFIG_NXP_TJA11XX_PHY=m -CONFIG_OBJAGG=m -# CONFIG_OCFS2_DEBUG_FS is not set -CONFIG_OCFS2_DEBUG_MASKLOG=y -CONFIG_OCFS2_FS=m -CONFIG_OCFS2_FS_O2CB=m -CONFIG_OCFS2_FS_STATS=y -CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m -CONFIG_OPENVSWITCH_GENEVE=m -CONFIG_OPENVSWITCH_GRE=m -CONFIG_OPENVSWITCH=m -CONFIG_OPENVSWITCH_VXLAN=m -# CONFIG_OPT3001 is not set -CONFIG_ORANGEFS_FS=m -CONFIG_ORINOCO_USB=m -# CONFIG_OSF_PARTITION is not set -CONFIG_OVERLAY_FS_INDEX=y -CONFIG_OVERLAY_FS=m -CONFIG_OVERLAY_FS_METACOPY=y -CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW=y -CONFIG_OVERLAY_FS_REDIRECT_DIR=y -# CONFIG_OVERLAY_FS_XINO_AUTO is not set -CONFIG_P54_COMMON=m -CONFIG_P54_LEDS=y -CONFIG_P54_PCI=m -CONFIG_P54_USB=m -# CONFIG_PA12203001 is not set -CONFIG_PACKET_DIAG=m -CONFIG_PACKING=y -CONFIG_PADATA=y -CONFIG_PAGE_COUNTER=y -CONFIG_PAGE_POOL=y -CONFIG_PANASONIC_LAPTOP=m -# CONFIG_PANEL is not set -CONFIG_PARAVIRT_CLOCK=y -# CONFIG_PARAVIRT_DEBUG is not set -CONFIG_PARAVIRT_SPINLOCKS=y -# CONFIG_PARAVIRT_TIME_ACCOUNTING is not set -CONFIG_PARAVIRT=y -# CONFIG_PARIDE is not set -CONFIG_PARMAN=m -CONFIG_PARPORT_1284=y -# CONFIG_PARPORT_AX88796 is not set -CONFIG_PARPORT_NOT_PC=y -# CONFIG_PARPORT_PC_FIFO is not set -CONFIG_PARPORT_PC=m -# CONFIG_PARPORT_PC_PCMCIA is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -# CONFIG_PARPORT_SERIAL is not set -CONFIG_PARPORT=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_PATA_ACPI=m -CONFIG_PATA_ALI=m -CONFIG_PATA_AMD=m -CONFIG_PATA_ARTOP=m -CONFIG_PATA_ATIIXP=m -CONFIG_PATA_ATP867X=m -CONFIG_PATA_CMD640_PCI=m -CONFIG_PATA_CMD64X=m -CONFIG_PATA_CYPRESS=m -CONFIG_PATA_EFAR=m -CONFIG_PATA_HPT366=m -CONFIG_PATA_HPT37X=m -CONFIG_PATA_HPT3X2N=m -CONFIG_PATA_HPT3X3_DMA=y -CONFIG_PATA_HPT3X3=m -CONFIG_PATA_IT8213=m -CONFIG_PATA_IT821X=m -CONFIG_PATA_JMICRON=m -CONFIG_PATA_LEGACY=m -CONFIG_PATA_MARVELL=m -CONFIG_PATA_MPIIX=m -CONFIG_PATA_NETCELL=m -CONFIG_PATA_NINJA32=m -CONFIG_PATA_NS87410=m -CONFIG_PATA_NS87415=m -CONFIG_PATA_OLDPIIX=m -CONFIG_PATA_OPTIDMA=m -CONFIG_PATA_OPTI=m -CONFIG_PATA_PCMCIA=m -CONFIG_PATA_PDC2027X=m -CONFIG_PATA_PDC_OLD=m -CONFIG_PATA_PLATFORM=m -CONFIG_PATA_RADISYS=m -CONFIG_PATA_RDC=m -CONFIG_PATA_RZ1000=m -CONFIG_PATA_SCH=m -CONFIG_PATA_SERVERWORKS=m -CONFIG_PATA_SIL680=m -CONFIG_PATA_SIS=m -CONFIG_PATA_TOSHIBA=m -CONFIG_PATA_TRIFLEX=m -CONFIG_PATA_VIA=m -CONFIG_PATA_WINBOND=m -# CONFIG_PC104 is not set -CONFIG_PC87413_WDT=m -CONFIG_PCCARD=m -CONFIG_PCI_ATMEL=m -CONFIG_PCI_CNB20LE_QUIRK=y -CONFIG_PCIE_ECRC=y -CONFIG_PCI_HERMES=m -# CONFIG_PCI_HYPERV_INTERFACE is not set -# CONFIG_PCI_HYPERV is not set -CONFIG_PCI_IOV=y -CONFIG_PCIPCWATCHDOG=m -CONFIG_PCI_PF_STUB=m -# CONFIG_PCI_REALLOC_ENABLE_AUTO is not set -CONFIG_PCI_STUB=m -CONFIG_PCMCIA_3C574=m -CONFIG_PCMCIA_3C589=m -CONFIG_PCMCIA_AHA152X=m -CONFIG_PCMCIA_ATMEL=m -CONFIG_PCMCIA_AXNET=m -# CONFIG_PCMCIA_FDOMAIN is not set -CONFIG_PCMCIA_FMVJ18X=m -CONFIG_PCMCIA_HERMES=m -CONFIG_PCMCIA=m -CONFIG_PCMCIA_NMCLAN=m -CONFIG_PCMCIA_PCNET=m -CONFIG_PCMCIA_QLOGIC=m -CONFIG_PCMCIA_RAYCS=m -CONFIG_PCMCIA_SMC91C92=m -CONFIG_PCMCIA_SPECTRUM=m -CONFIG_PCMCIA_SYM53C500=m -CONFIG_PCMCIA_WL3501=m -CONFIG_PCMCIA_XIRC2PS=m -CONFIG_PCMCIA_XIRCOM=m -CONFIG_PCNET32=m -CONFIG_PD6729=m -CONFIG_PDC_ADMA=m -# CONFIG_PEAQ_WMI is not set -CONFIG_PERF_EVENTS_AMD_POWER=m -# CONFIG_PGTABLE_MAPPING is not set -CONFIG_PHANTOM=m -# CONFIG_PHY_CPCAP_USB is not set -CONFIG_PHYLIB=m -CONFIG_PHYLINK=m -CONFIG_PHY_PXA_28NM_HSIC=m -CONFIG_PHY_PXA_28NM_USB2=m -CONFIG_PHY_QCOM_USB_HSIC=m -CONFIG_PHY_QCOM_USB_HS=m -CONFIG_PHYSICAL_ALIGN=0x1000000 -CONFIG_PHY_TUSB1210=m -# CONFIG_PINCTRL_AMD is not set -# CONFIG_PINCTRL_BAYTRAIL is not set -# CONFIG_PINCTRL_BROXTON is not set -# CONFIG_PINCTRL_CANNONLAKE is not set -# CONFIG_PINCTRL_CEDARFORK is not set -# CONFIG_PINCTRL_CHERRYVIEW is not set -# CONFIG_PINCTRL_DENVERTON is not set -# CONFIG_PINCTRL_GEMINILAKE is not set -# CONFIG_PINCTRL_ICELAKE is not set -# CONFIG_PINCTRL_LEWISBURG is not set -# CONFIG_PINCTRL_MCP23S08 is not set -# CONFIG_PINCTRL_SUNRISEPOINT is not set -CONFIG_PINCTRL=y -CONFIG_PKCS8_PRIVATE_KEY_PARSER=m -# CONFIG_PLIP is not set -CONFIG_PLX_HERMES=m -CONFIG_PM_ADVANCED_DEBUG=y -CONFIG_PMBUS=m -# CONFIG_PM_DEVFREQ_EVENT is not set -CONFIG_PM_DEVFREQ=y -CONFIG_PM_GENERIC_DOMAINS_SLEEP=y -CONFIG_PM_GENERIC_DOMAINS=y -CONFIG_PM_OPP=y -# CONFIG_PMS7003 is not set -# CONFIG_PM_TRACE_RTC is not set -CONFIG_PPDEV=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_FILTER=y -CONFIG_PPP_MPPE=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPPOE=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP=y -# CONFIG_PPS_CLIENT_PARPORT is not set -CONFIG_PPTP=m -CONFIG_PREEMPT_COUNT=y -CONFIG_PREEMPTION=y -CONFIG_PREEMPT_NOTIFIERS=y -CONFIG_PREEMPT_RCU=y -# CONFIG_PREEMPT_VOLUNTARY is not set -CONFIG_PREEMPT=y -CONFIG_PRIME_NUMBERS=m -CONFIG_PRINTER=y -# CONFIG_PRINTK_TIME is not set -CONFIG_PRINT_QUOTA_WARNING=y -# CONFIG_PRISM2_USB is not set -CONFIG_PRISM54=m -CONFIG_PROC_CHILDREN=y -# CONFIG_PROCESSOR_SELECT is not set -# CONFIG_PROFILING is not set -# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set -CONFIG_PSAMPLE=m -CONFIG_PTP_1588_CLOCK_KVM=m -# CONFIG_PVH is not set -CONFIG_PVPANIC=m -CONFIG_QCOM_EMAC=m -CONFIG_QCOM_HIDMA=m -CONFIG_QCOM_HIDMA_MGMT=m -# CONFIG_QCOM_SPMI_ADC5 is not set -# CONFIG_QCOM_SPMI_IADC is not set -# CONFIG_QCOM_SPMI_VADC is not set -CONFIG_QEDE=m -CONFIG_QED_FCOE=y -CONFIG_QEDF=m -CONFIG_QEDI=m -CONFIG_QED_ISCSI=y -CONFIG_QED_LL2=y -CONFIG_QED=m -CONFIG_QED_OOO=y -CONFIG_QED_SRIOV=y -# CONFIG_QFMT_V2 is not set -CONFIG_QLA3XXX=m -CONFIG_QLCNIC_HWMON=y -CONFIG_QLCNIC=m -CONFIG_QLCNIC_SRIOV=y -CONFIG_QLGE=m -CONFIG_QSEMI_PHY=m -CONFIG_QTNFMAC=m -CONFIG_QTNFMAC_PCIE=m -# CONFIG_QUOTA_NETLINK_INTERFACE is not set -CONFIG_QUOTA_TREE=m -CONFIG_R6040=m -CONFIG_R8169=m -CONFIG_R8188EU=m -CONFIG_R8712U=m -CONFIG_RAID6_PQ_BENCHMARK=y -CONFIG_RAID6_PQ=m -CONFIG_RAID_ATTRS=m -# CONFIG_RANDOMIZE_BASE is not set -CONFIG_RANDOM_TRUST_BOOTLOADER=y -CONFIG_RANDOM_TRUST_CPU=y -# CONFIG_RAVE_SP_CORE is not set -CONFIG_RC_CORE=m -# CONFIG_RC_DECODERS is not set -# CONFIG_RC_DEVICES is not set -# CONFIG_RC_MAP is not set -CONFIG_RCU_CPU_STALL_TIMEOUT=60 -# CONFIG_RCU_TRACE is not set -# CONFIG_RDS_DEBUG is not set -CONFIG_RDS=m -CONFIG_RDS_TCP=m -CONFIG_REALTEK_AUTOPM=y -CONFIG_REALTEK_PHY=m -CONFIG_REGMAP_I2C=m -CONFIG_REGMAP_IRQ=y -CONFIG_REGMAP_MMIO=y -CONFIG_REGMAP_SCCB=m -# CONFIG_REGULATOR_88PG86X is not set -# CONFIG_REGULATOR_88PM800 is not set -# CONFIG_REGULATOR_ACT8865 is not set -# CONFIG_REGULATOR_AD5398 is not set -# CONFIG_REGULATOR_ANATOP is not set -# CONFIG_REGULATOR_AXP20X is not set -# CONFIG_REGULATOR_BCM590XX is not set -# CONFIG_REGULATOR_DA9062 is not set -# CONFIG_REGULATOR_DA9210 is not set -# CONFIG_REGULATOR_DA9211 is not set -# CONFIG_REGULATOR_DEBUG is not set -# CONFIG_REGULATOR_FAN53555 is not set -# CONFIG_REGULATOR_FIXED_VOLTAGE is not set -# CONFIG_REGULATOR_ISL6271A is not set -# CONFIG_REGULATOR_ISL9305 is not set -# CONFIG_REGULATOR_LP3971 is not set -# CONFIG_REGULATOR_LP3972 is not set -# CONFIG_REGULATOR_LP872X is not set -# CONFIG_REGULATOR_LP8755 is not set -# CONFIG_REGULATOR_LTC3589 is not set -# CONFIG_REGULATOR_LTC3676 is not set -# CONFIG_REGULATOR_MAX14577 is not set -# CONFIG_REGULATOR_MAX1586 is not set -# CONFIG_REGULATOR_MAX77693 is not set -# CONFIG_REGULATOR_MAX8649 is not set -# CONFIG_REGULATOR_MAX8660 is not set -# CONFIG_REGULATOR_MAX8907 is not set -# CONFIG_REGULATOR_MAX8952 is not set -# CONFIG_REGULATOR_MT6311 is not set -# CONFIG_REGULATOR_PFUZE100 is not set -# CONFIG_REGULATOR_PV88060 is not set -# CONFIG_REGULATOR_PV88080 is not set -# CONFIG_REGULATOR_PV88090 is not set -# CONFIG_REGULATOR_QCOM_SPMI is not set -# CONFIG_REGULATOR_RT5033 is not set -# CONFIG_REGULATOR_SLG51000 is not set -# CONFIG_REGULATOR_TPS51632 is not set -# CONFIG_REGULATOR_TPS62360 is not set -# CONFIG_REGULATOR_TPS65023 is not set -# CONFIG_REGULATOR_TPS6507X is not set -# CONFIG_REGULATOR_TPS65086 is not set -# CONFIG_REGULATOR_TPS65912 is not set -# CONFIG_REGULATOR_USERSPACE_CONSUMER is not set -# CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set -CONFIG_REGULATOR=y -# CONFIG_REISERFS_CHECK is not set -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_PROC_INFO=y -CONFIG_RENESAS_PHY=m -CONFIG_RESET_CONTROLLER=y -# CONFIG_RESET_TI_SYSCON is not set -CONFIG_RETU_WATCHDOG=m -# CONFIG_RFD77402 is not set -CONFIG_RFKILL=m -CONFIG_RMI4_2D_SENSOR=y -CONFIG_RMI4_CORE=m -CONFIG_RMI4_F03_SERIO=m -CONFIG_RMI4_F03=y -CONFIG_RMI4_F11=y -CONFIG_RMI4_F12=y -CONFIG_RMI4_F30=y -# CONFIG_RMI4_F34 is not set -# CONFIG_RMI4_F54 is not set -# CONFIG_RMI4_F55 is not set -# CONFIG_RMI4_I2C is not set -# CONFIG_RMI4_SMB is not set -CONFIG_ROCKCHIP_PHY=m -CONFIG_ROCKER=m -CONFIG_RPCSEC_GSS_KRB5=m -# CONFIG_RPR0521 is not set -CONFIG_RSI_91X=m -CONFIG_RSI_COEX=y -# CONFIG_RSI_DEBUGFS is not set -CONFIG_RSI_SDIO=m -CONFIG_RSI_USB=m -CONFIG_RT2400PCI=m -CONFIG_RT2500PCI=m -CONFIG_RT2500USB=m -CONFIG_RT2800_LIB=m -CONFIG_RT2800_LIB_MMIO=m -CONFIG_RT2800PCI=m -CONFIG_RT2800PCI_RT3290=y -CONFIG_RT2800PCI_RT33XX=y -CONFIG_RT2800PCI_RT35XX=y -CONFIG_RT2800PCI_RT53XX=y -CONFIG_RT2800USB=m -CONFIG_RT2800USB_RT33XX=y -CONFIG_RT2800USB_RT3573=y -CONFIG_RT2800USB_RT35XX=y -CONFIG_RT2800USB_RT53XX=y -CONFIG_RT2800USB_RT55XX=y -CONFIG_RT2800USB_UNKNOWN=y -# CONFIG_RT2X00_DEBUG is not set -CONFIG_RT2X00_LIB_CRYPTO=y -CONFIG_RT2X00_LIB_FIRMWARE=y -CONFIG_RT2X00_LIB_LEDS=y -CONFIG_RT2X00_LIB=m -CONFIG_RT2X00_LIB_MMIO=m -CONFIG_RT2X00_LIB_PCI=m -CONFIG_RT2X00_LIB_USB=m -CONFIG_RT2X00=m -CONFIG_RT61PCI=m -CONFIG_RT73USB=m -CONFIG_RTC_DRV_88PM80X=m -CONFIG_RTC_DRV_ABEOZ9=m -# CONFIG_RTC_DRV_DA9063 is not set -# CONFIG_RTC_DRV_HID_SENSOR_TIME is not set -CONFIG_RTC_DRV_MAX8907=m -CONFIG_RTC_HCTOSYS_DEVICE="rtc0" -CONFIG_RTC_HCTOSYS=y -CONFIG_RTC_I2C_AND_SPI=m -CONFIG_RT_GROUP_SCHED=y -CONFIG_RTL8180=m -CONFIG_RTL8187_LEDS=y -CONFIG_RTL8187=m -CONFIG_RTL8188EE=m -CONFIG_RTL8192C_COMMON=m -CONFIG_RTL8192CE=m -CONFIG_RTL8192CU=m -CONFIG_RTL8192DE=m -CONFIG_RTL8192EE=m -CONFIG_RTL8192E=m -CONFIG_RTL8192SE=m -CONFIG_RTL8192U=m -CONFIG_RTL8723AE=m -CONFIG_RTL8723BE=m -CONFIG_RTL8723BS=m -CONFIG_RTL8723_COMMON=m -CONFIG_RTL8821AE=m -CONFIG_RTLBTCOEXIST=m -CONFIG_RTL_CARDS=m -CONFIG_RTLLIB_CRYPTO_CCMP=m -CONFIG_RTLLIB_CRYPTO_TKIP=m -CONFIG_RTLLIB_CRYPTO_WEP=m -CONFIG_RTLLIB=m -# CONFIG_RTLWIFI_DEBUG is not set -CONFIG_RTLWIFI=m -CONFIG_RTLWIFI_PCI=m -CONFIG_RTLWIFI_USB=m -CONFIG_RTS5208=m -CONFIG_RTW88_8822BE=y -CONFIG_RTW88_8822CE=y -CONFIG_RTW88_CORE=m -# CONFIG_RTW88_DEBUGFS is not set -# CONFIG_RTW88_DEBUG is not set -CONFIG_RTW88=m -CONFIG_RTW88_PCI=m -CONFIG_S2IO=m -CONFIG_SAMSUNG_LAPTOP=m -CONFIG_SAMSUNG_Q10=m -# CONFIG_SATA_DWC_DEBUG is not set -CONFIG_SATA_DWC=m -# CONFIG_SATA_DWC_OLD_DMA is not set -CONFIG_SATA_INIC162X=m -CONFIG_SATA_MV=m -CONFIG_SATA_NV=m -CONFIG_SATA_PROMISE=m -CONFIG_SATA_QSTOR=m -CONFIG_SATA_SIL24=m -CONFIG_SATA_SIL=m -CONFIG_SATA_SIS=m -CONFIG_SATA_SVW=m -CONFIG_SATA_SX4=m -CONFIG_SATA_ULI=m -CONFIG_SATA_VIA=m -CONFIG_SATA_VITESSE=m -CONFIG_SBC_EPX_C3_WATCHDOG=m -CONFIG_SBC_FITPC2_WATCHDOG=m -CONFIG_SC1200_WDT=m -CONFIG_SC92031=m -CONFIG_SCHED_AUTOGROUP=y -# CONFIG_SCHEDSTATS is not set -CONFIG_SCSI_3W_9XXX=m -CONFIG_SCSI_3W_SAS=m -CONFIG_SCSI_AACRAID=m -CONFIG_SCSI_ACARD=m -CONFIG_SCSI_ADVANSYS=m -CONFIG_SCSI_AIC79XX=m -CONFIG_SCSI_AIC7XXX=m -CONFIG_SCSI_AIC94XX=m -CONFIG_SCSI_AM53C974=m -CONFIG_SCSI_ARCMSR=m -CONFIG_SCSI_BFA_FC=m -CONFIG_SCSI_BNX2_ISCSI=m -CONFIG_SCSI_BNX2X_FCOE=m -CONFIG_SCSI_BUSLOGIC=m -CONFIG_SCSI_CHELSIO_FCOE=m -# CONFIG_SCSI_CONSTANTS is not set -CONFIG_SCSI_CXGB3_ISCSI=m -CONFIG_SCSI_CXGB4_ISCSI=m -CONFIG_SCSI_DC395x=m -CONFIG_SCSI_DEBUG=m -CONFIG_SCSI_DMX3191D=m -CONFIG_SCSI_DPT_I2O=m -# CONFIG_SCSI_ENCLOSURE is not set -CONFIG_SCSI_ESAS2R=m -CONFIG_SCSI_FC_ATTRS=m -CONFIG_SCSI_FDOMAIN=m -CONFIG_SCSI_FDOMAIN_PCI=m -CONFIG_SCSI_FLASHPOINT=y -CONFIG_SCSI_GDTH=m -CONFIG_SCSI_HPSA=m -CONFIG_SCSI_HPTIOP=m -CONFIG_SCSI_IMM=m -CONFIG_SCSI_INIA100=m -CONFIG_SCSI_INITIO=m -CONFIG_SCSI_IPR_DUMP=y -CONFIG_SCSI_IPR=m -CONFIG_SCSI_IPR_TRACE=y -CONFIG_SCSI_IPS=m -CONFIG_SCSI_ISCI=m -CONFIG_SCSI_ISCSI_ATTRS=m -CONFIG_SCSI_IZIP_EPP16=y -CONFIG_SCSI_IZIP_SLOW_CTR=y -CONFIG_SCSI_LOWLEVEL_PCMCIA=y -CONFIG_SCSI_LOWLEVEL=y -# CONFIG_SCSI_LPFC_DEBUG_FS is not set -CONFIG_SCSI_LPFC=m -CONFIG_SCSI_MPT2SAS=m -CONFIG_SCSI_MPT2SAS_MAX_SGE=128 -CONFIG_SCSI_MPT3SAS=m -CONFIG_SCSI_MPT3SAS_MAX_SGE=128 -CONFIG_SCSI_MVSAS_DEBUG=y -CONFIG_SCSI_MVSAS=m -CONFIG_SCSI_MVSAS_TASKLET=y -CONFIG_SCSI_MVUMI=m -CONFIG_SCSI_MYRB=m -CONFIG_SCSI_MYRS=m -CONFIG_SCSI_NETLINK=y -CONFIG_SCSI_PM8001=m -CONFIG_SCSI_PMCRAID=m -CONFIG_SCSI_PPA=m -CONFIG_SCSI_QLA_FC=m -CONFIG_SCSI_QLA_ISCSI=m -CONFIG_SCSI_QLOGIC_1280=m -CONFIG_SCSI_SAS_ATA=y -CONFIG_SCSI_SAS_ATTRS=m -CONFIG_SCSI_SAS_HOST_SMP=y -CONFIG_SCSI_SAS_LIBSAS=m -CONFIG_SCSI_SMARTPQI=m -# CONFIG_SCSI_SNIC_DEBUG_FS is not set -CONFIG_SCSI_SNIC=m -CONFIG_SCSI_SPI_ATTRS=m -CONFIG_SCSI_SRP_ATTRS=m -CONFIG_SCSI_STEX=m -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -CONFIG_SCSI_SYM53C8XX_MMIO=y -CONFIG_SCSI_UFS_BSG=y -CONFIG_SCSI_UFS_CDNS_PLATFORM=m -CONFIG_SCSI_UFS_DWC_TC_PCI=m -CONFIG_SCSI_UFS_DWC_TC_PLATFORM=m -CONFIG_SCSI_UFSHCD=m -CONFIG_SCSI_UFSHCD_PCI=m -CONFIG_SCSI_UFSHCD_PLATFORM=m -CONFIG_SCSI_VIRTIO=m -CONFIG_SCSI_WD719X=m -CONFIG_SCTP_COOKIE_HMAC_MD5=y -CONFIG_SCTP_COOKIE_HMAC_SHA1=y -# CONFIG_SCTP_DBG_OBJCNT is not set -CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5=y -# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_NONE is not set -# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1 is not set -CONFIG_SDIO_UART=m -CONFIG_SDR_MAX2175=m -# CONFIG_SDR_PLATFORM_DRIVERS is not set -# CONFIG_SECURITY is not set -# CONFIG_SENSIRION_SGP30 is not set -CONFIG_SENSORS_ABITUGURU3=m -CONFIG_SENSORS_ABITUGURU=m -CONFIG_SENSORS_ACPI_POWER=m -CONFIG_SENSORS_AD7414=m -CONFIG_SENSORS_AD7418=m -CONFIG_SENSORS_ADC128D818=m -CONFIG_SENSORS_ADM1021=m -CONFIG_SENSORS_ADM1025=m -CONFIG_SENSORS_ADM1026=m -CONFIG_SENSORS_ADM1029=m -CONFIG_SENSORS_ADM1031=m -CONFIG_SENSORS_ADM1275=m -CONFIG_SENSORS_ADM9240=m -CONFIG_SENSORS_ADS7828=m -CONFIG_SENSORS_ADT7410=m -CONFIG_SENSORS_ADT7411=m -CONFIG_SENSORS_ADT7462=m -CONFIG_SENSORS_ADT7470=m -CONFIG_SENSORS_ADT7475=m -CONFIG_SENSORS_ADT7X10=m -CONFIG_SENSORS_AMC6821=m -CONFIG_SENSORS_APPLESMC=m -CONFIG_SENSORS_AS370=m -CONFIG_SENSORS_ASB100=m -CONFIG_SENSORS_ASC7621=m -CONFIG_SENSORS_ASPEED=m -CONFIG_SENSORS_ATK0110=m -CONFIG_SENSORS_ATXP1=m -CONFIG_SENSORS_CORETEMP=m -CONFIG_SENSORS_DELL_SMM=m -CONFIG_SENSORS_DME1737=m -CONFIG_SENSORS_DS1621=m -CONFIG_SENSORS_DS620=m -CONFIG_SENSORS_EMC1403=m -CONFIG_SENSORS_EMC2103=m -CONFIG_SENSORS_EMC6W201=m -CONFIG_SENSORS_F71805F=m -CONFIG_SENSORS_F71882FG=m -CONFIG_SENSORS_F75375S=m -CONFIG_SENSORS_FAM15H_POWER=m -CONFIG_SENSORS_FSCHMD=m -CONFIG_SENSORS_FTSTEUTATES=m -CONFIG_SENSORS_G760A=m -CONFIG_SENSORS_G762=m -CONFIG_SENSORS_GL518SM=m -CONFIG_SENSORS_GL520SM=m -CONFIG_SENSORS_HDAPS=m -CONFIG_SENSORS_HIH6130=m -# CONFIG_SENSORS_HMC5843_I2C is not set -CONFIG_SENSORS_I5500=m -CONFIG_SENSORS_I5K_AMB=m -# CONFIG_SENSORS_IBMAEM is not set -CONFIG_SENSORS_IBM_CFFPS=m -# CONFIG_SENSORS_IBMPEX is not set -# CONFIG_SENSORS_IIO_HWMON is not set -CONFIG_SENSORS_INA209=m -CONFIG_SENSORS_INA2XX=m -CONFIG_SENSORS_INA3221=m -CONFIG_SENSORS_INSPUR_IPSPS=m -CONFIG_SENSORS_IR35221=m -CONFIG_SENSORS_IR38064=m -CONFIG_SENSORS_IRPS5401=m -# CONFIG_SENSORS_ISL29018 is not set -# CONFIG_SENSORS_ISL29028 is not set -CONFIG_SENSORS_ISL68137=m -CONFIG_SENSORS_IT87=m -CONFIG_SENSORS_K10TEMP=m -CONFIG_SENSORS_K8TEMP=m -CONFIG_SENSORS_LINEAGE=m -CONFIG_SENSORS_LIS3_I2C=m -CONFIG_SENSORS_LIS3LV02D=m -CONFIG_SENSORS_LM25066=m -CONFIG_SENSORS_LM63=m -CONFIG_SENSORS_LM73=m -CONFIG_SENSORS_LM75=m -CONFIG_SENSORS_LM77=m -CONFIG_SENSORS_LM78=m -CONFIG_SENSORS_LM80=m -CONFIG_SENSORS_LM83=m -CONFIG_SENSORS_LM85=m -CONFIG_SENSORS_LM87=m -CONFIG_SENSORS_LM90=m -CONFIG_SENSORS_LM92=m -CONFIG_SENSORS_LM93=m -CONFIG_SENSORS_LM95234=m -CONFIG_SENSORS_LM95241=m -CONFIG_SENSORS_LM95245=m -CONFIG_SENSORS_LTC2945=m -CONFIG_SENSORS_LTC2978=m -CONFIG_SENSORS_LTC2978_REGULATOR=y -CONFIG_SENSORS_LTC2990=m -CONFIG_SENSORS_LTC3815=m -CONFIG_SENSORS_LTC4151=m -CONFIG_SENSORS_LTC4215=m -CONFIG_SENSORS_LTC4222=m -CONFIG_SENSORS_LTC4245=m -CONFIG_SENSORS_LTC4260=m -CONFIG_SENSORS_LTC4261=m -CONFIG_SENSORS_MAX16064=m -CONFIG_SENSORS_MAX16065=m -CONFIG_SENSORS_MAX1619=m -CONFIG_SENSORS_MAX1668=m -CONFIG_SENSORS_MAX197=m -CONFIG_SENSORS_MAX20751=m -CONFIG_SENSORS_MAX31785=m -CONFIG_SENSORS_MAX31790=m -CONFIG_SENSORS_MAX34440=m -CONFIG_SENSORS_MAX6621=m -CONFIG_SENSORS_MAX6639=m -CONFIG_SENSORS_MAX6642=m -CONFIG_SENSORS_MAX6650=m -CONFIG_SENSORS_MAX6697=m -CONFIG_SENSORS_MAX8688=m -CONFIG_SENSORS_MCP3021=m -CONFIG_SENSORS_NCT6683=m -CONFIG_SENSORS_NCT6775=m -CONFIG_SENSORS_NCT7802=m -CONFIG_SENSORS_NCT7904=m -CONFIG_SENSORS_NPCM7XX=m -CONFIG_SENSORS_NTC_THERMISTOR=m -CONFIG_SENSORS_PC87360=m -CONFIG_SENSORS_PC87427=m -CONFIG_SENSORS_PCF8591=m -CONFIG_SENSORS_PMBUS=m -CONFIG_SENSORS_POWR1220=m -CONFIG_SENSORS_PXE1610=m -# CONFIG_SENSORS_RM3100_I2C is not set -CONFIG_SENSORS_SCH5627=m -CONFIG_SENSORS_SCH5636=m -CONFIG_SENSORS_SCH56XX_COMMON=m -CONFIG_SENSORS_SHT21=m -CONFIG_SENSORS_SHT3x=m -CONFIG_SENSORS_SHTC1=m -CONFIG_SENSORS_SIS5595=m -CONFIG_SENSORS_SMM665=m -CONFIG_SENSORS_SMSC47B397=m -CONFIG_SENSORS_SMSC47M192=m -CONFIG_SENSORS_SMSC47M1=m -CONFIG_SENSORS_STTS751=m -CONFIG_SENSORS_TC654=m -CONFIG_SENSORS_TC74=m -CONFIG_SENSORS_THMC50=m -CONFIG_SENSORS_TMP102=m -CONFIG_SENSORS_TMP103=m -CONFIG_SENSORS_TMP108=m -CONFIG_SENSORS_TMP401=m -CONFIG_SENSORS_TMP421=m -CONFIG_SENSORS_TPS40422=m -CONFIG_SENSORS_TPS53679=m -# CONFIG_SENSORS_TSL2563 is not set -CONFIG_SENSORS_UCD9000=m -CONFIG_SENSORS_UCD9200=m -CONFIG_SENSORS_VIA686A=m -CONFIG_SENSORS_VIA_CPUTEMP=m -CONFIG_SENSORS_VT1211=m -CONFIG_SENSORS_VT8231=m -CONFIG_SENSORS_W83627EHF=m -CONFIG_SENSORS_W83627HF=m -CONFIG_SENSORS_W83773G=m -CONFIG_SENSORS_W83781D=m -CONFIG_SENSORS_W83791D=m -CONFIG_SENSORS_W83792D=m -CONFIG_SENSORS_W83793=m -CONFIG_SENSORS_W83795_FANCTRL=y -CONFIG_SENSORS_W83795=m -CONFIG_SENSORS_W83L785TS=m -CONFIG_SENSORS_W83L786NG=m -CONFIG_SENSORS_XGENE=m -CONFIG_SENSORS_ZL6100=m -# CONFIG_SERIAL_8250_CONSOLE is not set -CONFIG_SERIAL_8250_CS=m -# CONFIG_SERIAL_8250_EXTENDED is not set -CONFIG_SERIAL_8250_NR_UARTS=4 -CONFIG_SERIAL_DEV_BUS=m -# CONFIG_SERIO_PARKBD is not set -CONFIG_SERIO_SERPORT=m -CONFIG_SFC_FALCON=m -CONFIG_SFC=m -CONFIG_SFC_MCDI_LOGGING=y -CONFIG_SFC_MCDI_MON=y -CONFIG_SFC_SRIOV=y -CONFIG_SFI=y -CONFIG_SFP=m -# CONFIG_SGI_PARTITION is not set -CONFIG_SHUFFLE_PAGE_ALLOCATOR=y -# CONFIG_SI1133 is not set -# CONFIG_SI1145 is not set -# CONFIG_SI7005 is not set -# CONFIG_SI7020 is not set -CONFIG_SIS190=m -CONFIG_SIS900=m -# CONFIG_SKGE_DEBUG is not set -CONFIG_SKGE_GENESIS=y -CONFIG_SKGE=m -CONFIG_SKY2=m -CONFIG_SLAB_FREELIST_RANDOM=y -CONFIG_SLHC=y -# CONFIG_SLIP_COMPRESSED is not set -CONFIG_SLIP=m -# CONFIG_SLIP_MODE_SLIP6 is not set -# CONFIG_SLIP_SMART is not set -# CONFIG_SLOB is not set -CONFIG_SLUB_MEMCG_SYSFS_ON=y -CONFIG_SMARTJOYPLUS_FF=y -CONFIG_SMSC37B787_WDT=m -CONFIG_SMSC911X=m -CONFIG_SMSC9420=m -CONFIG_SMSC_PHY=m -CONFIG_SMSC_SCH311X_WDT=m -CONFIG_SMS_SDIO_DRV=m -# CONFIG_SMS_SIANO_DEBUGFS is not set -CONFIG_SMS_SIANO_MDTV=m -CONFIG_SMS_SIANO_RC=y -CONFIG_SMS_USB_DRV=m -CONFIG_SND_AC97_CODEC=m -CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0 -CONFIG_SND_AC97_POWER_SAVE=y -CONFIG_SND_AD1889=m -CONFIG_SND_ALI5451=m -CONFIG_SND_ALOOP=m -CONFIG_SND_ALS300=m -CONFIG_SND_ALS4000=m -CONFIG_SND_ASIHPI=m -CONFIG_SND_ATIIXP=m -CONFIG_SND_ATIIXP_MODEM=m -CONFIG_SND_AU8810=m -CONFIG_SND_AU8820=m -CONFIG_SND_AU8830=m -CONFIG_SND_AW2=m -CONFIG_SND_AZT3328=m -CONFIG_SND_BCD2000=m -CONFIG_SND_BEBOB=m -CONFIG_SND_BT87X=m -CONFIG_SND_BT87X_OVERCLOCK=y -CONFIG_SND_CA0106=m -CONFIG_SND_CMIPCI=m -CONFIG_SND_CS4281=m -CONFIG_SND_CS46XX=m -CONFIG_SND_CS46XX_NEW_DSP=y -CONFIG_SND_CTXFI=m -CONFIG_SND_DARLA20=m -CONFIG_SND_DARLA24=m -CONFIG_SND_DICE=m -CONFIG_SND_DYNAMIC_MINORS=y -CONFIG_SND_ECHO3G=m -CONFIG_SND_EMU10K1=m -CONFIG_SND_EMU10K1_SEQ=m -CONFIG_SND_EMU10K1X=m -CONFIG_SND_ENS1370=m -CONFIG_SND_ENS1371=m -CONFIG_SND_ES1938=m -CONFIG_SND_ES1968_INPUT=y -CONFIG_SND_ES1968=m -CONFIG_SND_FIREFACE=m -CONFIG_SND_FIREWIRE_DIGI00X=m -CONFIG_SND_FIREWIRE_LIB=m -CONFIG_SND_FIREWIRE_MOTU=m -CONFIG_SND_FIREWIRE_TASCAM=m -CONFIG_SND_FIREWIRE=y -CONFIG_SND_FIREWORKS=m -CONFIG_SND_FM801=m -CONFIG_SND_GINA20=m -CONFIG_SND_GINA24=m -CONFIG_SND_HDA_CODEC_ANALOG=m -CONFIG_SND_HDA_CODEC_CA0110=m -CONFIG_SND_HDA_CODEC_CA0132_DSP=y -CONFIG_SND_HDA_CODEC_CA0132=m -CONFIG_SND_HDA_CODEC_CIRRUS=m -CONFIG_SND_HDA_CODEC_CMEDIA=m -CONFIG_SND_HDA_CODEC_CONEXANT=m -CONFIG_SND_HDA_CODEC_HDMI=m -CONFIG_SND_HDA_CODEC_REALTEK=m -CONFIG_SND_HDA_CODEC_SI3054=m -CONFIG_SND_HDA_CODEC_SIGMATEL=m -CONFIG_SND_HDA_CODEC_VIA=m -CONFIG_SND_HDA_CORE=m -CONFIG_SND_HDA_DSP_LOADER=y -CONFIG_SND_HDA_GENERIC=m -CONFIG_SND_HDA_INPUT_BEEP_MODE=1 -CONFIG_SND_HDA_INPUT_BEEP=y -CONFIG_SND_HDA_INTEL_DETECT_DMIC=y -CONFIG_SND_HDA_INTEL=m -CONFIG_SND_HDA=m -CONFIG_SND_HDA_PATCH_LOADER=y -CONFIG_SND_HDA_PREALLOC_SIZE=4096 -CONFIG_SND_HDA_RECONFIG=y -CONFIG_SND_HDSP=m -CONFIG_SND_HDSPM=m -# CONFIG_SND_HRTIMER is not set -CONFIG_SND_HWDEP=m -CONFIG_SND_ICE1712=m -CONFIG_SND_ICE1724=m -CONFIG_SND_INDIGODJ=m -CONFIG_SND_INDIGODJX=m -CONFIG_SND_INDIGOIO=m -CONFIG_SND_INDIGOIOX=m -CONFIG_SND_INDIGO=m -CONFIG_SND_INTEL8X0=m -CONFIG_SND_INTEL8X0M=m -CONFIG_SND_INTEL_NHLT=m -CONFIG_SND_ISIGHT=m -CONFIG_SND_KORG1212=m -CONFIG_SND_LAYLA20=m -CONFIG_SND_LAYLA24=m -CONFIG_SND_LOLA=m -CONFIG_SND_LX6464ES=m -CONFIG_SND=m -CONFIG_SND_MAESTRO3_INPUT=y -CONFIG_SND_MAESTRO3=m -CONFIG_SND_MAX_CARDS=32 -CONFIG_SND_MIA=m -CONFIG_SND_MIXART=m -CONFIG_SND_MIXER_OSS=m -CONFIG_SND_MONA=m -CONFIG_SND_MPU401_UART=m -# CONFIG_SND_MTS64 is not set -CONFIG_SND_NM256=m -CONFIG_SND_OPL3_LIB=m -CONFIG_SND_OPL3_LIB_SEQ=m -CONFIG_SND_OSSEMUL=y -CONFIG_SND_OXFW=m -CONFIG_SND_OXYGEN_LIB=m -CONFIG_SND_OXYGEN=m -CONFIG_SND_PCM=m -CONFIG_SND_PCM_OSS=m -CONFIG_SND_PCM_OSS_PLUGINS=y -CONFIG_SND_PCXHR=m -# CONFIG_SND_PORTMAN2X4 is not set -CONFIG_SND_RAWMIDI=m -CONFIG_SND_RIPTIDE=m -CONFIG_SND_RME32=m -CONFIG_SND_RME9652=m -CONFIG_SND_RME96=m -CONFIG_SND_SB_COMMON=m -CONFIG_SND_SEQ_DEVICE=m -# CONFIG_SND_SEQ_DUMMY is not set -CONFIG_SND_SEQ_MIDI_EMUL=m -CONFIG_SND_SEQ_MIDI_EVENT=m -CONFIG_SND_SEQ_MIDI=m -CONFIG_SND_SEQUENCER=m -# CONFIG_SND_SEQUENCER_OSS is not set -CONFIG_SND_SEQ_VIRMIDI=m -CONFIG_SND_SONICVIBES=m -# CONFIG_SND_SUPPORT_OLD_API is not set -CONFIG_SND_SYNTH_EMUX=m -CONFIG_SND_TIMER=m -CONFIG_SND_TRIDENT=m -CONFIG_SND_USB_6FIRE=m -CONFIG_SND_USB_AUDIO=m -CONFIG_SND_USB_AUDIO_USE_MEDIA_CONTROLLER=y -CONFIG_SND_USB_CAIAQ_INPUT=y -CONFIG_SND_USB_CAIAQ=m -CONFIG_SND_USB_HIFACE=m -CONFIG_SND_USB_LINE6=m -CONFIG_SND_USB_PODHD=m -CONFIG_SND_USB_POD=m -CONFIG_SND_USB_TONEPORT=m -CONFIG_SND_USB_UA101=m -CONFIG_SND_USB_US122L=m -CONFIG_SND_USB_USX2Y=m -CONFIG_SND_USB_VARIAX=m -CONFIG_SND_VIA82XX=m -CONFIG_SND_VIA82XX_MODEM=m -CONFIG_SND_VIRTUOSO=m -CONFIG_SND_VX222=m -CONFIG_SND_VX_LIB=m -CONFIG_SND_YMFPCI=m -CONFIG_SOCK_CGROUP_DATA=y -CONFIG_SOCK_VALIDATE_XMIT=y -CONFIG_SOFT_WATCHDOG=m -# CONFIG_SOLARIS_X86_PARTITION is not set -CONFIG_SONY_LAPTOP=m -CONFIG_SONYPI_COMPAT=y -# CONFIG_SOUND_OSS_CORE_PRECLAIM is not set -CONFIG_SOUND_OSS_CORE=y -CONFIG_SP5100_TCO=m -# CONFIG_SPEAKUP is not set -CONFIG_SPMI=m -# CONFIG_SPS30 is not set -# CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set -# CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set -CONFIG_SQUASHFS_DECOMP_MULTI=y -# CONFIG_SQUASHFS_DECOMP_SINGLE is not set -# CONFIG_SQUASHFS_EMBEDDED is not set -# CONFIG_SQUASHFS_FILE_CACHE is not set -CONFIG_SQUASHFS_FILE_DIRECT=y -CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 -CONFIG_SQUASHFS_LZ4=y -CONFIG_SQUASHFS_LZO=y -CONFIG_SQUASHFS=m -CONFIG_SQUASHFS_XATTR=y -CONFIG_SQUASHFS_XZ=y -CONFIG_SQUASHFS_ZLIB=y -CONFIG_SQUASHFS_ZSTD=y -# CONFIG_SRF08 is not set -CONFIG_SSB_B43_PCI_BRIDGE=y -CONFIG_SSB_BLOCKIO=y -CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y -CONFIG_SSB_DRIVER_PCICORE=y -CONFIG_SSB=m -CONFIG_SSB_PCIHOST_POSSIBLE=y -CONFIG_SSB_PCIHOST=y -CONFIG_SSB_PCMCIAHOST_POSSIBLE=y -CONFIG_SSB_PCMCIAHOST=y -CONFIG_SSB_SDIOHOST_POSSIBLE=y -CONFIG_SSB_SDIOHOST=y -CONFIG_SSB_SPROM=y -# CONFIG_STACKTRACE is not set -# CONFIG_STAGING_GASKET_FRAMEWORK is not set -# CONFIG_STAGING_MEDIA is not set -CONFIG_STAGING=y -CONFIG_STE10XP=m -CONFIG_STK3310=m -# CONFIG_STK8312 is not set -# CONFIG_STK8BA50 is not set -CONFIG_STMMAC_ETH=m -CONFIG_STMMAC_PCI=m -CONFIG_STMMAC_PLATFORM=m -CONFIG_STMMAC_SELFTESTS=y -CONFIG_STP=m -CONFIG_STREAM_PARSER=y -# CONFIG_STRICT_DEVMEM is not set -CONFIG_ST_UVIS25_I2C=m -CONFIG_ST_UVIS25=m -CONFIG_SUNDANCE=m -CONFIG_SUNDANCE_MMIO=y -CONFIG_SUNGEM=m -CONFIG_SUNGEM_PHY=m -# CONFIG_SUN_PARTITION is not set -# CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES is not set -CONFIG_SUNRPC_GSS=m -CONFIG_SUNRPC=m -CONFIG_SURFACE_PRO3_BUTTON=m -# CONFIG_SUSPEND_SKIP_SYNC is not set -CONFIG_SWPHY=y -# CONFIG_SX9500 is not set -CONFIG_SXGBE_ETH=m -# CONFIG_SYSCTL_SYSCALL is not set -CONFIG_SYSTEMPORT=m -# CONFIG_SYSV68_PARTITION is not set -# CONFIG_T5403 is not set -CONFIG_TABLET_SERIAL_WACOM4=m -CONFIG_TABLET_USB_ACECAD=m -CONFIG_TABLET_USB_AIPTEK=m -CONFIG_TABLET_USB_GTCO=m -CONFIG_TABLET_USB_KBTAB=m -CONFIG_TABLET_USB_PEGASUS=m -CONFIG_TAHVO_USB_HOST_BY_DEFAULT=y -CONFIG_TAHVO_USB=m -CONFIG_TAP=m -CONFIG_TASKS_RCU=y -CONFIG_TCP_CONG_BBR=m -CONFIG_TCP_CONG_BIC=m -CONFIG_TCP_CONG_CDG=m -CONFIG_TCP_CONG_DCTCP=m -CONFIG_TCP_CONG_HSTCP=m -CONFIG_TCP_CONG_HTCP=m -CONFIG_TCP_CONG_HYBLA=m -CONFIG_TCP_CONG_ILLINOIS=m -CONFIG_TCP_CONG_LP=m -CONFIG_TCP_CONG_NV=m -CONFIG_TCP_CONG_SCALABLE=m -CONFIG_TCP_CONG_VEGAS=m -CONFIG_TCP_CONG_VENO=m -CONFIG_TCP_CONG_WESTWOOD=m -CONFIG_TCP_CONG_YEAH=m -# CONFIG_TCP_MD5SIG is not set -# CONFIG_TCS3414 is not set -# CONFIG_TCS3472 is not set -CONFIG_TEHUTI=m -CONFIG_TERANETICS_PHY=m -# CONFIG_TEST_OBJAGG is not set -# CONFIG_TEST_PARMAN is not set -CONFIG_TEXTSEARCH_BM=m -CONFIG_TEXTSEARCH_FSM=m -CONFIG_TEXTSEARCH_KMP=m -CONFIG_TEXTSEARCH=y -CONFIG_THERMAL_GOV_BANG_BANG=y -CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y -CONFIG_THINKPAD_ACPI_ALSA_SUPPORT=y -# CONFIG_THINKPAD_ACPI_DEBUGFACILITIES is not set -# CONFIG_THINKPAD_ACPI_DEBUG is not set -CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y -CONFIG_THINKPAD_ACPI=m -CONFIG_THINKPAD_ACPI_UNSAFE_LEDS=y -CONFIG_THINKPAD_ACPI_VIDEO=y -CONFIG_THRUSTMASTER_FF=y -CONFIG_THUNDER_NIC_BGX=m -CONFIG_THUNDER_NIC_PF=m -CONFIG_THUNDER_NIC_RGX=m -CONFIG_THUNDER_NIC_VF=m -# CONFIG_TI_ADC081C is not set -# CONFIG_TI_ADS1015 is not set -# CONFIG_TI_DAC5571 is not set -CONFIG_TIFM_7XX1=m -CONFIG_TIFM_CORE=m -CONFIG_TIGON3=m -CONFIG_TLAN=m -CONFIG_TLS_DEVICE=y -CONFIG_TLS=m -CONFIG_TMD_HERMES=m -# CONFIG_TMP006 is not set -# CONFIG_TMP007 is not set -CONFIG_TOPSTAR_LAPTOP=m -CONFIG_TOSHIBA_BT_RFKILL=m -CONFIG_TOSHIBA_HAPS=m -CONFIG_TOSHIBA_WMI=m -CONFIG_TOUCHSCREEN_AD7879_I2C=m -CONFIG_TOUCHSCREEN_AD7879=m -CONFIG_TOUCHSCREEN_ADC=m -CONFIG_TOUCHSCREEN_ATMEL_MXT=m -CONFIG_TOUCHSCREEN_ATMEL_MXT_T37=y -CONFIG_TOUCHSCREEN_BU21013=m -CONFIG_TOUCHSCREEN_BU21029=m -CONFIG_TOUCHSCREEN_CHIPONE_ICN8505=m -CONFIG_TOUCHSCREEN_CYTTSP4_CORE=m -CONFIG_TOUCHSCREEN_CYTTSP4_I2C=m -CONFIG_TOUCHSCREEN_CYTTSP_CORE=m -CONFIG_TOUCHSCREEN_CYTTSP_I2C=m -CONFIG_TOUCHSCREEN_DYNAPRO=m -CONFIG_TOUCHSCREEN_EDT_FT5X06=m -CONFIG_TOUCHSCREEN_EETI=m -CONFIG_TOUCHSCREEN_EGALAX_SERIAL=m -CONFIG_TOUCHSCREEN_EKTF2127=m -CONFIG_TOUCHSCREEN_ELAN=m -CONFIG_TOUCHSCREEN_ELO=m -CONFIG_TOUCHSCREEN_EXC3000=m -CONFIG_TOUCHSCREEN_FUJITSU=m -CONFIG_TOUCHSCREEN_GUNZE=m -CONFIG_TOUCHSCREEN_HAMPSHIRE=m -CONFIG_TOUCHSCREEN_ILI210X=m -CONFIG_TOUCHSCREEN_INEXIO=m -CONFIG_TOUCHSCREEN_IQS5XX=m -CONFIG_TOUCHSCREEN_MAX11801=m -CONFIG_TOUCHSCREEN_MCS5000=m -CONFIG_TOUCHSCREEN_MELFAS_MIP4=m -CONFIG_TOUCHSCREEN_MK712=m -CONFIG_TOUCHSCREEN_MMS114=m -CONFIG_TOUCHSCREEN_MTOUCH=m -CONFIG_TOUCHSCREEN_PENMOUNT=m -CONFIG_TOUCHSCREEN_PIXCIR=m -CONFIG_TOUCHSCREEN_ROHM_BU21023=m -CONFIG_TOUCHSCREEN_S6SY761=m -CONFIG_TOUCHSCREEN_SILEAD=m -CONFIG_TOUCHSCREEN_ST1232=m -CONFIG_TOUCHSCREEN_STMFTS=m -CONFIG_TOUCHSCREEN_SUR40=m -CONFIG_TOUCHSCREEN_SX8654=m -CONFIG_TOUCHSCREEN_TOUCHIT213=m -CONFIG_TOUCHSCREEN_TOUCHRIGHT=m -CONFIG_TOUCHSCREEN_TOUCHWIN=m -CONFIG_TOUCHSCREEN_TPS6507X=m -CONFIG_TOUCHSCREEN_TSC2004=m -CONFIG_TOUCHSCREEN_TSC2007_IIO=y -CONFIG_TOUCHSCREEN_TSC2007=m -CONFIG_TOUCHSCREEN_TSC200X_CORE=m -CONFIG_TOUCHSCREEN_TSC_SERIO=m -CONFIG_TOUCHSCREEN_USB_3M=y -CONFIG_TOUCHSCREEN_USB_COMPOSITE=m -CONFIG_TOUCHSCREEN_USB_DMC_TSC10=y -CONFIG_TOUCHSCREEN_USB_E2I=y -CONFIG_TOUCHSCREEN_USB_EASYTOUCH=y -CONFIG_TOUCHSCREEN_USB_EGALAX=y -CONFIG_TOUCHSCREEN_USB_ELO=y -CONFIG_TOUCHSCREEN_USB_ETT_TC45USB=y -CONFIG_TOUCHSCREEN_USB_ETURBO=y -CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH=y -CONFIG_TOUCHSCREEN_USB_GOTOP=y -CONFIG_TOUCHSCREEN_USB_GUNZE=y -CONFIG_TOUCHSCREEN_USB_IDEALTEK=y -CONFIG_TOUCHSCREEN_USB_IRTOUCH=y -CONFIG_TOUCHSCREEN_USB_ITM=y -CONFIG_TOUCHSCREEN_USB_JASTEC=y -CONFIG_TOUCHSCREEN_USB_NEXIO=y -CONFIG_TOUCHSCREEN_USB_PANJIT=y -CONFIG_TOUCHSCREEN_USB_ZYTRONIC=y -CONFIG_TOUCHSCREEN_WACOM_I2C=m -CONFIG_TOUCHSCREEN_WACOM_W8001=m -CONFIG_TOUCHSCREEN_WDT87XX_I2C=m -CONFIG_TOUCHSCREEN_WM9705=y -CONFIG_TOUCHSCREEN_WM9712=y -CONFIG_TOUCHSCREEN_WM9713=y -CONFIG_TOUCHSCREEN_WM97XX=m -CONFIG_TOUCHSCREEN_ZET6223=m -# CONFIG_TPL0102 is not set -CONFIG_TQMX86_WDT=m -# CONFIG_TSL2583 is not set -# CONFIG_TSL2772 is not set -# CONFIG_TSL4531 is not set -# CONFIG_TSYS01 is not set -# CONFIG_TSYS02D is not set -CONFIG_TTPCI_EEPROM=m -# CONFIG_TTY_PRINTK is not set -CONFIG_TULIP=m -CONFIG_TULIP_MMIO=y -CONFIG_TULIP_MWI=y -CONFIG_TULIP_NAPI_HW_MITIGATION=y -CONFIG_TULIP_NAPI=y -CONFIG_TUN=m -CONFIG_TUN_VNET_CROSS_LE=y -CONFIG_TYPEC_DP_ALTMODE=m -CONFIG_TYPEC_FUSB302=m -CONFIG_TYPEC=m -CONFIG_TYPEC_MUX_PI3USB30532=m -CONFIG_TYPEC_NVIDIA_ALTMODE=m -CONFIG_TYPEC_RT1711H=m -CONFIG_TYPEC_TCPCI=m -CONFIG_TYPEC_TCPM=m -CONFIG_TYPEC_TPS6598X=m -CONFIG_TYPEC_UCSI=m -CONFIG_TYPHOON=m -# CONFIG_UCLAMP_TASK is not set -CONFIG_UCSI_ACPI=m -CONFIG_UCSI_CCG=m -CONFIG_UDF_FS=m -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_UEVENT_HELPER=y -CONFIG_UHID=m -# CONFIG_UIO_AEC is not set -# CONFIG_UIO_CIF is not set -# CONFIG_UIO_DMEM_GENIRQ is not set -# CONFIG_UIO_HV_GENERIC is not set -CONFIG_UIO=m -# CONFIG_UIO_MF624 is not set -# CONFIG_UIO_NETX is not set -CONFIG_UIO_PCI_GENERIC=m -# CONFIG_UIO_PDRV_GENIRQ is not set -# CONFIG_UIO_PRUSS is not set -# CONFIG_UIO_SERCOS3 is not set -CONFIG_ULI526X=m -# CONFIG_ULTRIX_PARTITION is not set -CONFIG_UNICODE_NORMALIZATION_SELFTEST=m -CONFIG_UNICODE=y -CONFIG_UNINLINE_SPIN_UNLOCK=y -# CONFIG_UNISYSSPAR is not set -CONFIG_UNIX_DIAG=m -# CONFIG_UNIXWARE_DISKLABEL is not set -# CONFIG_UNWINDER_GUESS is not set -# CONFIG_US5182D is not set -CONFIG_USB_ACM=m -CONFIG_USB_AIRSPY=m -CONFIG_USB_ALI_M5632=y -CONFIG_USB_AMD5536UDC=m -CONFIG_USB_AN2720=y -CONFIG_USB_ARMLINUX=y -# CONFIG_USB_AUDIO is not set -CONFIG_USB_BDC_PCI=m -CONFIG_USB_BDC_UDC=m -CONFIG_USB_BELKIN=y -CONFIG_USB_CATC=m -# CONFIG_USB_CDC_COMPOSITE is not set -CONFIG_USB_CDNS3_GADGET=y -CONFIG_USB_CDNS3_HOST=y -CONFIG_USB_CDNS3=m -CONFIG_USB_CDNS3_PCI_WRAP=m -CONFIG_USB_CHAOSKEY=m -CONFIG_USB_CHIPIDEA_HOST=y -CONFIG_USB_CHIPIDEA=m -CONFIG_USB_CHIPIDEA_UDC=y -# CONFIG_USB_CONFIGFS_ACM is not set -# CONFIG_USB_CONFIGFS_ECM is not set -# CONFIG_USB_CONFIGFS_ECM_SUBSET is not set -# CONFIG_USB_CONFIGFS_EEM is not set -# CONFIG_USB_CONFIGFS_F_FS is not set -# CONFIG_USB_CONFIGFS_F_HID is not set -# CONFIG_USB_CONFIGFS_F_LB_SS is not set -# CONFIG_USB_CONFIGFS_F_MIDI is not set -CONFIG_USB_CONFIGFS_F_PRINTER=y -# CONFIG_USB_CONFIGFS_F_UAC1 is not set -CONFIG_USB_CONFIGFS_F_UAC1_LEGACY=y -# CONFIG_USB_CONFIGFS_F_UAC2 is not set -# CONFIG_USB_CONFIGFS_F_UVC is not set -CONFIG_USB_CONFIGFS=m -# CONFIG_USB_CONFIGFS_MASS_STORAGE is not set -# CONFIG_USB_CONFIGFS_NCM is not set -# CONFIG_USB_CONFIGFS_OBEX is not set -# CONFIG_USB_CONFIGFS_RNDIS is not set -# CONFIG_USB_CONFIGFS_SERIAL is not set -CONFIG_USB_DUMMY_HCD=m -CONFIG_USB_EG20T=m -CONFIG_USB_EHCI_FSL=m -CONFIG_USB_EHCI_HCD_PLATFORM=y -CONFIG_USB_EHCI_ROOT_HUB_TT=y -CONFIG_USB_EPSON2888=y -CONFIG_USB_ETH_EEM=y -CONFIG_USB_ETH=m -CONFIG_USB_ETH_RNDIS=y -CONFIG_USB_EZUSB_FX2=m -CONFIG_USB_F_ECM=m -CONFIG_USB_F_EEM=m -CONFIG_USB_F_NCM=m -# CONFIG_USB_FOTG210_UDC is not set -CONFIG_USB_F_PRINTER=m -CONFIG_USB_F_RNDIS=m -CONFIG_USB_F_SUBSET=m -CONFIG_USB_F_UAC1_LEGACY=m -# CONFIG_USB_FUNCTIONFS is not set -# CONFIG_USB_G_ACM_MS is not set -# CONFIG_USB_GADGET_DEBUG_FILES is not set -# CONFIG_USB_GADGET_DEBUG_FS is not set -# CONFIG_USB_GADGET_DEBUG is not set -# CONFIG_USB_GADGETFS is not set -CONFIG_USB_GADGET=m -CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2 -CONFIG_USB_GADGET_VBUS_DRAW=2 -# CONFIG_USB_G_DBGP is not set -# CONFIG_USB_G_HID is not set -CONFIG_USB_GL860=m -# CONFIG_USB_G_MULTI is not set -CONFIG_USB_G_NCM=m -CONFIG_USB_GOKU=m -# CONFIG_USB_G_PRINTER is not set -CONFIG_USB_GR_UDC=m -# CONFIG_USB_G_SERIAL is not set -CONFIG_USB_GSPCA_BENQ=m -CONFIG_USB_GSPCA_CONEX=m -CONFIG_USB_GSPCA_CPIA1=m -CONFIG_USB_GSPCA_DTCS033=m -CONFIG_USB_GSPCA_ETOMS=m -CONFIG_USB_GSPCA_FINEPIX=m -CONFIG_USB_GSPCA_JEILINJ=m -CONFIG_USB_GSPCA_JL2005BCD=m -CONFIG_USB_GSPCA_KINECT=m -CONFIG_USB_GSPCA_KONICA=m -CONFIG_USB_GSPCA=m -CONFIG_USB_GSPCA_MARS=m -CONFIG_USB_GSPCA_MR97310A=m -CONFIG_USB_GSPCA_NW80X=m -CONFIG_USB_GSPCA_OV519=m -CONFIG_USB_GSPCA_OV534_9=m -CONFIG_USB_GSPCA_OV534=m -CONFIG_USB_GSPCA_PAC207=m -CONFIG_USB_GSPCA_PAC7302=m -CONFIG_USB_GSPCA_PAC7311=m -CONFIG_USB_GSPCA_SE401=m -CONFIG_USB_GSPCA_SN9C2028=m -CONFIG_USB_GSPCA_SN9C20X=m -CONFIG_USB_GSPCA_SONIXB=m -CONFIG_USB_GSPCA_SONIXJ=m -CONFIG_USB_GSPCA_SPCA1528=m -CONFIG_USB_GSPCA_SPCA500=m -CONFIG_USB_GSPCA_SPCA501=m -CONFIG_USB_GSPCA_SPCA505=m -CONFIG_USB_GSPCA_SPCA506=m -CONFIG_USB_GSPCA_SPCA508=m -CONFIG_USB_GSPCA_SPCA561=m -CONFIG_USB_GSPCA_SQ905C=m -CONFIG_USB_GSPCA_SQ905=m -CONFIG_USB_GSPCA_SQ930X=m -CONFIG_USB_GSPCA_STK014=m -CONFIG_USB_GSPCA_STK1135=m -CONFIG_USB_GSPCA_STV0680=m -CONFIG_USB_GSPCA_SUNPLUS=m -CONFIG_USB_GSPCA_T613=m -CONFIG_USB_GSPCA_TOPRO=m -CONFIG_USB_GSPCA_TOUPTEK=m -CONFIG_USB_GSPCA_TV8532=m -CONFIG_USB_GSPCA_VC032X=m -CONFIG_USB_GSPCA_VICAM=m -CONFIG_USB_GSPCA_XIRLINK_CIT=m -CONFIG_USB_GSPCA_ZC3XX=m -# CONFIG_USB_G_WEBCAM is not set -CONFIG_USB_HACKRF=m -CONFIG_USB_HCD_BCMA=m -CONFIG_USB_HCD_SSB=m -CONFIG_USB_HSIC_USB4604=m -CONFIG_USB_HSO=m -CONFIG_USB_HUB_USB251XB=m -CONFIG_USBIP_CORE=m -# CONFIG_USBIP_DEBUG is not set -CONFIG_USB_IPHETH=m -CONFIG_USBIP_HOST=m -CONFIG_USBIP_VHCI_HCD=m -CONFIG_USBIP_VHCI_HC_PORTS=8 -CONFIG_USBIP_VHCI_NR_HCS=1 -CONFIG_USBIP_VUDC=m -CONFIG_USB_ISP116X_HCD=m -CONFIG_USB_KAWETH=m -CONFIG_USB_KC2190=y -CONFIG_USB_LAN78XX=m -CONFIG_USB_LEDS_TRIGGER_USBPORT=m -CONFIG_USB_LED_TRIG=y -CONFIG_USB_LIBCOMPOSITE=m -CONFIG_USB_LINK_LAYER_TEST=m -CONFIG_USB_M5602=m -CONFIG_USB_M66592=m -# CONFIG_USB_MASS_STORAGE is not set -# CONFIG_USB_MIDI_GADGET is not set -CONFIG_USB_MUSB_DUAL_ROLE=y -# CONFIG_USB_MUSB_GADGET is not set -CONFIG_USB_MUSB_HDRC=m -# CONFIG_USB_MUSB_HOST is not set -CONFIG_USB_MV_U3D=m -CONFIG_USB_MV_UDC=m -CONFIG_USB_NET2272_DMA=y -CONFIG_USB_NET2272=m -CONFIG_USB_NET2280=m -CONFIG_USB_NET_AQC111=m -CONFIG_USB_NET_AX88179_178A=m -CONFIG_USB_NET_AX8817X=m -CONFIG_USB_NET_CDC_EEM=m -CONFIG_USB_NET_CDCETHER=m -CONFIG_USB_NET_CDC_MBIM=m -CONFIG_USB_NET_CDC_NCM=m -CONFIG_USB_NET_CDC_SUBSET_ENABLE=m -CONFIG_USB_NET_CDC_SUBSET=m -CONFIG_USB_NET_CH9200=m -# CONFIG_USB_NET_CX82310_ETH is not set -CONFIG_USB_NET_DM9601=m -CONFIG_USB_NET_GL620A=m -CONFIG_USB_NET_HUAWEI_CDC_NCM=m -CONFIG_USB_NET_INT51X1=m -CONFIG_USB_NET_KALMIA=m -CONFIG_USB_NET_MCS7830=m -CONFIG_USB_NET_NET1080=m -CONFIG_USB_NET_PLUSB=m -CONFIG_USB_NET_QMI_WWAN=m -CONFIG_USB_NET_RNDIS_HOST=m -CONFIG_USB_NET_RNDIS_WLAN=m -CONFIG_USB_NET_SMSC75XX=m -CONFIG_USB_NET_SMSC95XX=m -CONFIG_USB_NET_SR9700=m -CONFIG_USB_NET_SR9800=m -CONFIG_USB_NET_ZAURUS=m -CONFIG_USB_OHCI_HCD=m -CONFIG_USB_OHCI_HCD_PCI=m -CONFIG_USB_OHCI_HCD_PLATFORM=m -# CONFIG_USB_OHCI_HCD_SSB is not set -# CONFIG_USB_OTG_BLACKLIST_HUB is not set -CONFIG_USB_OTG_FSM=m -CONFIG_USB_OTG_WHITELIST=y -CONFIG_USB_OTG=y -CONFIG_USBPCWATCHDOG=m -CONFIG_USB_PEGASUS=m -CONFIG_USB_PHY=y -# CONFIG_USB_PRINTER is not set -# CONFIG_USB_PWC_DEBUG is not set -CONFIG_USB_PWC_INPUT_EVDEV=y -CONFIG_USB_PWC=m -CONFIG_USB_PXA27X=m -CONFIG_USB_R8A66597=m -CONFIG_USB_ROLES_INTEL_XHCI=m -CONFIG_USB_ROLE_SWITCH=m -CONFIG_USB_RTL8150=m -CONFIG_USB_RTL8152=m -CONFIG_USB_S2255=m -CONFIG_USB_SERIAL_AIRCABLE=m -CONFIG_USB_SERIAL_ARK3116=m -CONFIG_USB_SERIAL_BELKIN=m -CONFIG_USB_SERIAL_CH341=m -CONFIG_USB_SERIAL_CP210X=m -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_CYPRESS_M8=m -CONFIG_USB_SERIAL_DEBUG=m -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_EDGEPORT_TI=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_F81232=m -CONFIG_USB_SERIAL_F8153X=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_GARMIN=m -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IPW=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_IUU=m -CONFIG_USB_SERIAL_KEYSPAN=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_KOBIL_SCT=m -CONFIG_USB_SERIAL=m -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_METRO=m -CONFIG_USB_SERIAL_MOS7715_PARPORT=y -CONFIG_USB_SERIAL_MOS7720=m -CONFIG_USB_SERIAL_MOS7840=m -CONFIG_USB_SERIAL_MXUPORT=m -CONFIG_USB_SERIAL_NAVMAN=m -CONFIG_USB_SERIAL_OMNINET=m -CONFIG_USB_SERIAL_OPTICON=m -CONFIG_USB_SERIAL_OPTION=m -CONFIG_USB_SERIAL_OTI6858=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_QCAUX=m -CONFIG_USB_SERIAL_QT2=m -CONFIG_USB_SERIAL_QUALCOMM=m -CONFIG_USB_SERIAL_SAFE=m -CONFIG_USB_SERIAL_SAFE_PADDED=y -CONFIG_USB_SERIAL_SIERRAWIRELESS=m -CONFIG_USB_SERIAL_SIMPLE=m -CONFIG_USB_SERIAL_SPCP8X5=m -CONFIG_USB_SERIAL_SSU100=m -CONFIG_USB_SERIAL_SYMBOL=m -CONFIG_USB_SERIAL_TI=m -CONFIG_USB_SERIAL_UPD78F0730=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_WHITEHEAT=m -CONFIG_USB_SERIAL_WISHBONE=m -CONFIG_USB_SERIAL_WWAN=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_XSENS_MT=m -CONFIG_USB_SIERRA_NET=m -CONFIG_USB_SNP_CORE=m -CONFIG_USB_STKWEBCAM=m -CONFIG_USB_STORAGE_ENE_UB6250=m -CONFIG_USB_STORAGE_REALTEK=m -CONFIG_USB_STV06XX=m -CONFIG_USB_U_ETHER=m -CONFIG_USB_UHCI_HCD=m -CONFIG_USB_ULPI_BUS=m -CONFIG_USB_USBNET=m -# CONFIG_USB_USS720 is not set -CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y -CONFIG_USB_VIDEO_CLASS=m -CONFIG_USB_VL600=m -CONFIG_USB_WDM=m -# CONFIG_USB_WUSB_CBAF is not set -CONFIG_USB_XHCI_HCD=m -CONFIG_USB_XHCI_PCI=m -CONFIG_USB_XHCI_PLATFORM=m -CONFIG_USB_ZD1201=m -# CONFIG_USB_ZERO is not set -CONFIG_USB_ZR364XX=m -CONFIG_USERIO=m -CONFIG_USER_NS=y -CONFIG_USER_RETURN_NOTIFIER=y -# CONFIG_UWB is not set -CONFIG_V4L2_FWNODE=m -# CONFIG_V4L_MEM2MEM_DRIVERS is not set -# CONFIG_V4L_PLATFORM_DRIVERS is not set -# CONFIG_V4L_TEST_DRIVERS is not set -# CONFIG_VBOXGUEST is not set -# CONFIG_VCNL4000 is not set -# CONFIG_VCNL4035 is not set -# CONFIG_VEML6070 is not set -CONFIG_VETH=m -CONFIG_VFAT_FS=m -CONFIG_VFIO_IOMMU_TYPE1=m -CONFIG_VFIO=m -CONFIG_VFIO_MDEV_DEVICE=m -CONFIG_VFIO_MDEV=m -CONFIG_VFIO_NOIOMMU=y -CONFIG_VFIO_PCI_IGD=y -CONFIG_VFIO_PCI_INTX=y -CONFIG_VFIO_PCI=m -CONFIG_VFIO_PCI_MMAP=y -CONFIG_VFIO_PCI_VGA=y -CONFIG_VFIO_VIRQFD=m -# CONFIG_VGACON_SOFT_SCROLLBACK is not set -CONFIG_VGA_SWITCHEROO=y -CONFIG_VHOST=m -CONFIG_VHOST_NET=m -CONFIG_VHOST_VSOCK=m -CONFIG_VIA_RHINE=m -CONFIG_VIA_RHINE_MMIO=y -CONFIG_VIA_VELOCITY=m -CONFIG_VIA_WDT=m -CONFIG_VIDEO_AD5820=m -CONFIG_VIDEO_ADP1653=m -CONFIG_VIDEO_ADV7170=m -CONFIG_VIDEO_ADV7175=m -CONFIG_VIDEO_ADV7183=m -CONFIG_VIDEO_ADV7343=m -CONFIG_VIDEO_ADV7393=m -# CONFIG_VIDEO_ADV_DEBUG is not set -CONFIG_VIDEO_AK881X=m -CONFIG_VIDEO_AU0828=m -CONFIG_VIDEO_AU0828_RC=y -CONFIG_VIDEO_AU0828_V4L2=y -CONFIG_VIDEO_BT819=m -CONFIG_VIDEO_BT856=m -CONFIG_VIDEO_BT866=m -CONFIG_VIDEOBUF2_CORE=m -CONFIG_VIDEOBUF2_DMA_CONTIG=m -CONFIG_VIDEOBUF2_DMA_SG=m -CONFIG_VIDEOBUF2_DVB=m -CONFIG_VIDEOBUF2_MEMOPS=m -CONFIG_VIDEOBUF2_V4L2=m -CONFIG_VIDEOBUF2_VMALLOC=m -CONFIG_VIDEOBUF_DMA_SG=m -CONFIG_VIDEOBUF_GEN=m -CONFIG_VIDEOBUF_VMALLOC=m -CONFIG_VIDEO_CPIA2=m -CONFIG_VIDEO_CS3308=m -CONFIG_VIDEO_CS5345=m -CONFIG_VIDEO_CS53L32A=m -CONFIG_VIDEO_CX18_ALSA=m -CONFIG_VIDEO_CX18=m -CONFIG_VIDEO_CX231XX_ALSA=m -CONFIG_VIDEO_CX231XX_DVB=m -CONFIG_VIDEO_CX231XX=m -CONFIG_VIDEO_CX231XX_RC=y -CONFIG_VIDEO_CX2341X=m -CONFIG_VIDEO_CX23885=m -CONFIG_VIDEO_CX25821_ALSA=m -CONFIG_VIDEO_CX25821=m -CONFIG_VIDEO_CX25840=m -CONFIG_VIDEO_CX88_ALSA=m -CONFIG_VIDEO_CX88_BLACKBIRD=m -CONFIG_VIDEO_CX88_DVB=m -CONFIG_VIDEO_CX88_ENABLE_VP3054=y -CONFIG_VIDEO_CX88=m -CONFIG_VIDEO_CX88_MPEG=m -CONFIG_VIDEO_CX88_VP3054=m -CONFIG_VIDEO_DEV=m -CONFIG_VIDEO_DT3155=m -CONFIG_VIDEO_EM28XX_ALSA=m -CONFIG_VIDEO_EM28XX_DVB=m -CONFIG_VIDEO_EM28XX=m -CONFIG_VIDEO_EM28XX_RC=m -CONFIG_VIDEO_EM28XX_V4L2=m -# CONFIG_VIDEO_FB_IVTV is not set -# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set -CONFIG_VIDEO_GO7007_LOADER=m -CONFIG_VIDEO_GO7007=m -CONFIG_VIDEO_GO7007_USB=m -CONFIG_VIDEO_GO7007_USB_S2250_BOARD=m -CONFIG_VIDEO_HDPVR=m -CONFIG_VIDEO_HEXIUM_GEMINI=m -CONFIG_VIDEO_HEXIUM_ORION=m -CONFIG_VIDEO_I2C=m -CONFIG_VIDEO_IR_I2C=m -CONFIG_VIDEO_IVTV_ALSA=m -# CONFIG_VIDEO_IVTV_DEPRECATED_IOCTLS is not set -CONFIG_VIDEO_IVTV=m -CONFIG_VIDEO_KS0127=m -CONFIG_VIDEO_LM3560=m -CONFIG_VIDEO_LM3646=m -CONFIG_VIDEO_M52790=m -CONFIG_VIDEO_MEYE=m -CONFIG_VIDEO_ML86V7667=m -CONFIG_VIDEO_MSP3400=m -CONFIG_VIDEO_MT9M111=m -CONFIG_VIDEO_MT9T112=m -CONFIG_VIDEO_MT9V011=m -CONFIG_VIDEO_MT9V111=m -CONFIG_VIDEO_MXB=m -CONFIG_VIDEO_OV2640=m -CONFIG_VIDEO_OV2659=m -CONFIG_VIDEO_OV2680=m -CONFIG_VIDEO_OV2685=m -CONFIG_VIDEO_OV5695=m -CONFIG_VIDEO_OV6650=m -CONFIG_VIDEO_OV7640=m -CONFIG_VIDEO_OV7670=m -CONFIG_VIDEO_OV772X=m -CONFIG_VIDEO_OV7740=m -CONFIG_VIDEO_OV9640=m -# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set -CONFIG_VIDEO_PVRUSB2_DVB=y -CONFIG_VIDEO_PVRUSB2=m -CONFIG_VIDEO_PVRUSB2_SYSFS=y -CONFIG_VIDEO_RJ54N1=m -CONFIG_VIDEO_SAA6588=m -CONFIG_VIDEO_SAA6752HS=m -CONFIG_VIDEO_SAA7110=m -CONFIG_VIDEO_SAA711X=m -CONFIG_VIDEO_SAA7127=m -CONFIG_VIDEO_SAA7134_ALSA=m -CONFIG_VIDEO_SAA7134_DVB=m -CONFIG_VIDEO_SAA7134_GO7007=m -CONFIG_VIDEO_SAA7134=m -CONFIG_VIDEO_SAA7134_RC=y -CONFIG_VIDEO_SAA7146=m -CONFIG_VIDEO_SAA7146_VV=m -CONFIG_VIDEO_SAA7164=m -CONFIG_VIDEO_SAA717X=m -CONFIG_VIDEO_SAA7185=m -CONFIG_VIDEO_SOLO6X10=m -CONFIG_VIDEO_SONY_BTF_MPX=m -CONFIG_VIDEO_SR030PC30=m -CONFIG_VIDEO_STK1160_COMMON=m -CONFIG_VIDEO_STK1160=m -CONFIG_VIDEO_TDA7432=m -CONFIG_VIDEO_TDA9840=m -CONFIG_VIDEO_TEA6415C=m -CONFIG_VIDEO_TEA6420=m -CONFIG_VIDEO_THS7303=m -CONFIG_VIDEO_THS8200=m -CONFIG_VIDEO_TLV320AIC23B=m -CONFIG_VIDEO_TM6000_ALSA=m -CONFIG_VIDEO_TM6000_DVB=m -CONFIG_VIDEO_TM6000=m -CONFIG_VIDEO_TUNER=m -CONFIG_VIDEO_TVAUDIO=m -CONFIG_VIDEO_TVEEPROM=m -CONFIG_VIDEO_TVP514X=m -CONFIG_VIDEO_TVP5150=m -CONFIG_VIDEO_TVP7002=m -CONFIG_VIDEO_TW2804=m -CONFIG_VIDEO_TW5864=m -CONFIG_VIDEO_TW686X=m -CONFIG_VIDEO_TW68=m -CONFIG_VIDEO_TW9903=m -CONFIG_VIDEO_TW9906=m -CONFIG_VIDEO_TW9910=m -CONFIG_VIDEO_UDA1342=m -CONFIG_VIDEO_UPD64031A=m -CONFIG_VIDEO_UPD64083=m -CONFIG_VIDEO_USBTV=m -CONFIG_VIDEO_USBVISION=m -CONFIG_VIDEO_V4L2_I2C=y -CONFIG_VIDEO_V4L2=m -# CONFIG_VIDEO_V4L2_SUBDEV_API is not set -CONFIG_VIDEO_VP27SMPX=m -CONFIG_VIDEO_VPX3220=m -CONFIG_VIDEO_VS6624=m -CONFIG_VIDEO_WM8739=m -CONFIG_VIDEO_WM8775=m -# CONFIG_VIPERBOARD_ADC is not set -CONFIG_VIRT_DRIVERS=y -CONFIG_VIRTIO_BALLOON=m -CONFIG_VIRTIO_BLK=m -# CONFIG_VIRTIO_BLK_SCSI is not set -CONFIG_VIRTIO_CONSOLE=m -# CONFIG_VIRTIO_FS is not set -CONFIG_VIRTIO_INPUT=m -CONFIG_VIRTIO=m -# CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES is not set -CONFIG_VIRTIO_MMIO=m -CONFIG_VIRTIO_NET=m -CONFIG_VIRTIO_PCI_LEGACY=y -CONFIG_VIRTIO_PCI=m -CONFIG_VIRTIO_PMEM=m -CONFIG_VIRTIO_VSOCKETS_COMMON=m -CONFIG_VIRTIO_VSOCKETS=m -CONFIG_VIRT_WIFI=m -CONFIG_VITESSE_PHY=m -# CONFIG_VL53L0X_I2C is not set -# CONFIG_VL6180 is not set -# CONFIG_VLAN_8021Q_GVRP is not set -CONFIG_VLAN_8021Q=m -# CONFIG_VLAN_8021Q_MVRP is not set -CONFIG_VMD=y -CONFIG_VMWARE_PVSCSI=m -CONFIG_VMXNET3=m -CONFIG_VORTEX=m -CONFIG_VSOCKETS_DIAG=m -CONFIG_VSOCKETS=m -CONFIG_VSOCKMON=m -# CONFIG_VT6655 is not set -# CONFIG_VT6656 is not set -# CONFIG_VXGE_DEBUG_TRACE_ALL is not set -CONFIG_VXGE=m -CONFIG_VXLAN=m -# CONFIG_VZ89X is not set -CONFIG_W83627HF_WDT=m -CONFIG_W83877F_WDT=m -CONFIG_W83977F_WDT=m -CONFIG_WAFER_WDT=m -CONFIG_WANT_DEV_COREDUMP=y -CONFIG_WATCHDOG_CORE=y -# CONFIG_WATCHDOG_PRETIMEOUT_GOV is not set -# CONFIG_WCN36XX_DEBUGFS is not set -CONFIG_WCN36XX=m -CONFIG_WDTPCI=m -CONFIG_WEXT_CORE=y -CONFIG_WEXT_PRIV=y -CONFIG_WEXT_PROC=y -CONFIG_WEXT_SPY=y -CONFIG_WIL6210_DEBUGFS=y -CONFIG_WIL6210_ISR_COR=y -CONFIG_WIL6210=m -# CONFIG_WILC1000_HW_OOB_INTR is not set -CONFIG_WILC1000=m -CONFIG_WILC1000_SDIO=m -CONFIG_WILINK_PLATFORM_DATA=y -CONFIG_WIMAX_DEBUG_LEVEL=8 -CONFIG_WIMAX_I2400M_DEBUG_LEVEL=8 -CONFIG_WIMAX_I2400M=m -CONFIG_WIMAX_I2400M_USB=m -CONFIG_WIMAX=m -CONFIG_WINBOND_840=m -CONFIG_WIRELESS_EXT=y -CONFIG_WIRELESS_WDS=y -CONFIG_WIZNET_BUS_ANY=y -# CONFIG_WIZNET_BUS_DIRECT is not set -# CONFIG_WIZNET_BUS_INDIRECT is not set -CONFIG_WIZNET_W5100=m -CONFIG_WIZNET_W5300=m -CONFIG_WL1251=m -CONFIG_WL1251_SDIO=m -CONFIG_WL12XX=m -CONFIG_WL18XX=m -CONFIG_WLCORE=m -CONFIG_WLCORE_SDIO=m -CONFIG_WMI_BMOF=m -CONFIG_X86_ACPI_CPUFREQ=m -CONFIG_X86_AMD_FREQ_SENSITIVITY=m -# CONFIG_X86_CHECK_BIOS_CORRUPTION is not set -CONFIG_X86_CPUID=m -# CONFIG_X86_DEBUG_FPU is not set -CONFIG_X86_DEV_DMA_OPS=y -# CONFIG_X86_EXTENDED_PLATFORM is not set -CONFIG_X86_HV_CALLBACK_VECTOR=y -CONFIG_X86_INTEL_LPSS=y -CONFIG_X86_MSR=m -CONFIG_X86_P4_CLOCKMOD=m -CONFIG_X86_PCC_CPUFREQ=m -CONFIG_X86_PMEM_LEGACY_DEVICE=y -CONFIG_X86_PMEM_LEGACY=y -CONFIG_X86_POWERNOW_K8=m -CONFIG_X86_PTDUMP_CORE=y -# CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS is not set -CONFIG_X86_SPEEDSTEP_CENTRINO=m -CONFIG_X86_SPEEDSTEP_LIB=m -CONFIG_X86_X2APIC=y -CONFIG_X86_X32=y -# CONFIG_XDP_SOCKETS is not set -# CONFIG_XEN is not set -CONFIG_XFRM_ALGO=m -CONFIG_XFRM_INTERFACE=m -CONFIG_XFRM_IPCOMP=m -CONFIG_XFRM_MIGRATE=y -CONFIG_XFRM_OFFLOAD=y -CONFIG_XFRM_USER=m -# CONFIG_XFS_DEBUG is not set -CONFIG_XFS_FS=m -CONFIG_XFS_ONLINE_REPAIR=y -CONFIG_XFS_ONLINE_SCRUB=y -CONFIG_XFS_POSIX_ACL=y -CONFIG_XFS_QUOTA=y -CONFIG_XFS_RT=y -# CONFIG_XFS_WARN is not set -CONFIG_XIAOMI_WMI=m -CONFIG_XILINX_AXI_EMAC=m -CONFIG_XILINX_GMII2RGMII=m -CONFIG_XILINX_LL_TEMAC=m -CONFIG_XILINX_WATCHDOG=m -# CONFIG_XILINX_XADC is not set -CONFIG_XOR_BLOCKS=m -CONFIG_XXHASH=y -CONFIG_YELLOWFIN=m -CONFIG_YENTA=m -CONFIG_Z3FOLD=y -CONFIG_ZBUD=y -# CONFIG_ZD1211RW_DEBUG is not set -CONFIG_ZD1211RW=m -CONFIG_ZEROPLUS_FF=y -CONFIG_ZIIRAVE_WATCHDOG=m -CONFIG_ZOPT2201=m -# CONFIG_ZPA2326 is not set -CONFIG_ZPOOL=y -CONFIG_ZRAM=m -# CONFIG_ZRAM_MEMORY_TRACKING is not set -# CONFIG_ZRAM_WRITEBACK is not set -# CONFIG_ZSMALLOC_STAT is not set -CONFIG_ZSMALLOC=y -CONFIG_ZSTD_COMPRESS=m -CONFIG_ZSTD_DECOMPRESS=m -CONFIG_ZSWAP=y -!CONFIG_ACPI_EXTLOG= -!CONFIG_ACPI_I2C_OPREGION= -!CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION= -!CONFIG_ARCH_USE_MEMREMAP_PROT= -!CONFIG_AX25= -!CONFIG_BINARY_PRINTF= -!CONFIG_BLK_DEV_IO_TRACE= -!CONFIG_BRANCH_PROFILE_NONE= -!CONFIG_BSD_PROCESS_ACCT_V3= -!CONFIG_CGROUP_HUGETLB= -!CONFIG_CONTEXT_SWITCH_TRACER= -!CONFIG_DEBUG_PAGE_REF= -!CONFIG_DEFAULT_SECURITY_SELINUX= -!CONFIG_DEFXX= -!CONFIG_DM_INIT= -!CONFIG_DRM_DEBUG_MM= -!CONFIG_DYNAMIC_EVENTS= -!CONFIG_DYNAMIC_MEMORY_LAYOUT= -!CONFIG_EARLY_PRINTK_USB= -!CONFIG_EDAC_AMD64= -!CONFIG_EDAC_DEBUG= -!CONFIG_EDAC_DECODE_MCE= -!CONFIG_EDAC_E752X= -!CONFIG_EDAC_I10NM= -!CONFIG_EDAC_I3000= -!CONFIG_EDAC_I3200= -!CONFIG_EDAC_I5000= -!CONFIG_EDAC_I5100= -!CONFIG_EDAC_I5400= -!CONFIG_EDAC_I7300= -!CONFIG_EDAC_I7CORE= -!CONFIG_EDAC_I82975X= -!CONFIG_EDAC_IE31200= -!CONFIG_EDAC_LEGACY_SYSFS= -!CONFIG_EDAC_PND2= -!CONFIG_EDAC_SBRIDGE= -!CONFIG_EDAC_SKX= -!CONFIG_EDAC_X38= -!CONFIG_EFI_EARLYCON= -!CONFIG_EVENT_TRACING= -!CONFIG_EVM= -!CONFIG_EXT4_USE_FOR_EXT2= -!CONFIG_FTRACE_STARTUP_TEST= -!CONFIG_FTRACE_SYSCALLS= -!CONFIG_FUNCTION_ERROR_INJECTION= -!CONFIG_FUNCTION_TRACER= -!CONFIG_GENERIC_TRACER= -!CONFIG_HIST_TRIGGERS= -!CONFIG_HPET_MMAP= -!CONFIG_HUGETLB_PAGE= -!CONFIG_HWLAT_TRACER= -!CONFIG_IMA= -!CONFIG_INLINE_READ_UNLOCK= -!CONFIG_INLINE_READ_UNLOCK_IRQ= -!CONFIG_INLINE_SPIN_UNLOCK_IRQ= -!CONFIG_INLINE_WRITE_UNLOCK= -!CONFIG_INLINE_WRITE_UNLOCK_IRQ= -!CONFIG_INTEGRITY= -!CONFIG_INTEGRITY_AUDIT= -!CONFIG_INTEGRITY_SIGNATURE= -!CONFIG_IO_STRICT_DEVMEM= -!CONFIG_IP_PNP_BOOTP= -!CONFIG_IP_PNP_DHCP= -!CONFIG_IP_PNP_RARP= -!CONFIG_IRQSOFF_TRACER= -!CONFIG_KPROBE_EVENTS= -!CONFIG_KPROBES_SANITY_TEST= -!CONFIG_KRETPROBES= -!CONFIG_LOGO_LINUX_CLUT224= -!CONFIG_LOGO_LINUX_MONO= -!CONFIG_LOGO_LINUX_VGA16= -!CONFIG_LSM_MMAP_MIN_ADDR= -!CONFIG_MD_AUTODETECT= -!CONFIG_MFD_88PM860X= -!CONFIG_MFD_AS3711= -!CONFIG_MFD_DA9052_I2C= -!CONFIG_MFD_DA9055= -!CONFIG_MFD_LP8788= -!CONFIG_MFD_MAX77843= -!CONFIG_MFD_MAX8925= -!CONFIG_MFD_MAX8997= -!CONFIG_MFD_MAX8998= -!CONFIG_MFD_PALMAS= -!CONFIG_MFD_RC5T583= -!CONFIG_MFD_SEC_CORE= -!CONFIG_MFD_SMSC= -!CONFIG_MFD_TPS65090= -!CONFIG_MFD_TPS6586X= -!CONFIG_MFD_TPS80031= -!CONFIG_MFD_WM831X_I2C= -!CONFIG_MFD_WM8350_I2C= -!CONFIG_MFD_WM8400= -!CONFIG_MMIOTRACE= -!CONFIG_MOUSE_PS2_SMBUS= -!CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS= -!CONFIG_NET_DROP_MONITOR= -!CONFIG_NETFILTER_XT_TARGET_CONNSECMARK= -!CONFIG_NETFILTER_XT_TARGET_SECMARK= -!CONFIG_NETLABEL= -!CONFIG_NETPOLL= -!CONFIG_NET_POLL_CONTROLLER= -!CONFIG_NF_CONNTRACK_SECMARK= -!CONFIG_NOP_TRACER= -!CONFIG_OPROFILE= -!CONFIG_OPTPROBES= -!CONFIG_PGTABLE_MAPPING= -!CONFIG_PMIC_ADP5520= -!CONFIG_PMIC_DA903X= -!CONFIG_PM_TRACE= -!CONFIG_POWER_SUPPLY_HWMON= -!CONFIG_PREEMPTIRQ_DELAY_TEST= -!CONFIG_PREEMPTIRQ_EVENTS= -!CONFIG_PROBE_EVENTS= -!CONFIG_PROC_VMCORE= -!CONFIG_PROC_VMCORE_DEVICE_DUMP= -!CONFIG_PROFILE_ALL_BRANCHES= -!CONFIG_PROFILE_ANNOTATED_BRANCHES= -!CONFIG_RANDOMIZE_MEMORY= -!CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING= -!CONFIG_RING_BUFFER= -!CONFIG_RING_BUFFER_BENCHMARK= -!CONFIG_RING_BUFFER_STARTUP_TEST= -!CONFIG_ROOT_NFS= -!CONFIG_SCHED_TRACER= -!CONFIG_SECURITY_APPARMOR= -!CONFIG_SECURITY_LOADPIN= -!CONFIG_SECURITY_LOCKDOWN_LSM= -!CONFIG_SECURITY_NETWORK= -!CONFIG_SECURITY_NETWORK_XFRM= -!CONFIG_SECURITY_PATH= -!CONFIG_SECURITY_SAFESETID= -!CONFIG_SECURITY_SELINUX= -!CONFIG_SECURITY_SELINUX_AVC_STATS= -!CONFIG_SECURITY_SELINUX_BOOTPARAM= -!CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE= -!CONFIG_SECURITY_SELINUX_DEVELOP= -!CONFIG_SECURITY_SELINUX_DISABLE= -!CONFIG_SECURITY_SMACK= -!CONFIG_SECURITY_TOMOYO= -!CONFIG_SECURITY_WRITABLE_HOOKS= -!CONFIG_SECURITY_YAMA= -!CONFIG_SERIAL_8250_DETECT_IRQ= -!CONFIG_SERIAL_8250_MANY_PORTS= -!CONFIG_SERIAL_8250_RSA= -!CONFIG_SERIAL_8250_SHARE_IRQ= -!CONFIG_SERIAL_CORE_CONSOLE= -!CONFIG_SERIAL_EARLYCON= -!CONFIG_SKFP= -!CONFIG_SND_SE6X= -!CONFIG_SND_SEQ_HRTIMER_DEFAULT= -!CONFIG_STACK_TRACER= -!CONFIG_STATIC_KEYS_SELFTEST= -!CONFIG_THERMAL_HWMON= -!CONFIG_TRACE_CLOCK= -!CONFIG_TRACE_EVAL_MAP_FILE= -!CONFIG_TRACEPOINT_BENCHMARK= -!CONFIG_TRACEPOINTS= -!CONFIG_TRACER_SNAPSHOT= -!CONFIG_TRACING= -!CONFIG_TREE_RCU= -!CONFIG_TWL4030_CORE= -!CONFIG_TWL6040_CORE= -!CONFIG_UPROBE_EVENTS= -!CONFIG_UPROBES= -!CONFIG_USB_BDC_PCI= -!CONFIG_VGACON_SOFT_SCROLLBACK= -!CONFIG_VGACON_SOFT_SCROLLBACK_PERSISTENT_ENABLE_BY_DEFAULT= -!CONFIG_VGACON_SOFT_SCROLLBACK_SIZE= -!CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK= -!CONFIG_X86_GOLDFISH= -!CONFIG_X86_INTEL_MID= -!CONFIG_X86_NEED_RELOCS= -!CONFIG_X86_VSMP= diff --git a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-01 b/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-01 deleted file mode 100644 index 569c97ff4..000000000 --- a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-01 +++ /dev/null @@ -1,133 +0,0 @@ -# Calculate format=kernel name=.config os_install_arch_machine==x86_64 -# CONFIG_ADT7316 is not set -# CONFIG_AK09911 is not set -# CONFIG_AK8975 is not set -# CONFIG_BACKLIGHT_ADP8870 is not set -# CONFIG_BACKLIGHT_APPLE is not set -# CONFIG_BACKLIGHT_ARCXCNN is not set -# CONFIG_BACKLIGHT_BD6107 is not set -# CONFIG_BACKLIGHT_GENERIC is not set -# CONFIG_BACKLIGHT_GPIO is not set -# CONFIG_BACKLIGHT_LM3639 is not set -# CONFIG_BACKLIGHT_LV5207LP is not set -# CONFIG_BACKLIGHT_PM8941_WLED is not set -# CONFIG_BCMA_DRIVER_GPIO is not set -# CONFIG_CHARGER_BQ24190 is not set -# CONFIG_CHARGER_BQ24257 is not set -# CONFIG_CHARGER_BQ24735 is not set -# CONFIG_CHARGER_BQ25890 is not set -# CONFIG_CHARGER_GPIO is not set -# CONFIG_CHARGER_LT3651 is not set -# CONFIG_CHARGER_RT9455 is not set -# CONFIG_DEBUG_GPIO is not set -CONFIG_DELL_RBU=m -CONFIG_DELL_SMBIOS_SMM=y -CONFIG_DELL_SMBIOS_WMI=y -# CONFIG_DHT11 is not set -# CONFIG_EXTCON_GPIO is not set -# CONFIG_EXTCON_INTEL_INT3496 is not set -# CONFIG_EXTCON_MAX3355 is not set -# CONFIG_EXTCON_PTN5150 is not set -# CONFIG_EXTCON_USB_GPIO is not set -# CONFIG_FB_VIA is not set -CONFIG_GPIO_ACPI=y -# CONFIG_GPIO_ADP5588 is not set -# CONFIG_GPIO_AMD8111 is not set -CONFIG_GPIO_AMD_FCH=m -# CONFIG_GPIO_AMDPT is not set -# CONFIG_GPIO_BT8XX is not set -# CONFIG_GPIO_DLN2 is not set -# CONFIG_GPIO_DWAPB is not set -# CONFIG_GPIO_EXAR is not set -# CONFIG_GPIO_F7188X is not set -# CONFIG_GPIO_GENERIC_PLATFORM is not set -# CONFIG_GPIO_ICH is not set -# CONFIG_GPIO_IT87 is not set -CONFIG_GPIOLIB_FASTPATH_LIMIT=512 -CONFIG_GPIOLIB_IRQCHIP=y -CONFIG_GPIOLIB=y -# CONFIG_GPIO_LP873X is not set -# CONFIG_GPIO_LYNXPOINT is not set -# CONFIG_GPIO_MAX7300 is not set -# CONFIG_GPIO_MAX732X is not set -# CONFIG_GPIO_MB86S7X is not set -# CONFIG_GPIO_ML_IOH is not set -# CONFIG_GPIO_MOCKUP is not set -# CONFIG_GPIO_PCA953X is not set -# CONFIG_GPIO_PCF857X is not set -# CONFIG_GPIO_PCIE_IDIO_24 is not set -# CONFIG_GPIO_PCI_IDIO_16 is not set -# CONFIG_GPIO_RDC321X is not set -# CONFIG_GPIO_SCH311X is not set -# CONFIG_GPIO_SCH is not set -# CONFIG_GPIO_SYSFS is not set -# CONFIG_GPIO_TPIC2810 is not set -# CONFIG_GPIO_TPS65086 is not set -# CONFIG_GPIO_TPS65912 is not set -# CONFIG_GPIO_TQMX86 is not set -# CONFIG_GPIO_VIPERBOARD is not set -# CONFIG_GPIO_VX855 is not set -# CONFIG_GPIO_WHISKEY_COVE is not set -# CONFIG_GPIO_WINBOND is not set -# CONFIG_GPIO_WS16C48 is not set -# CONFIG_GPIO_XILINX is not set -# CONFIG_HID_CP2112 is not set -CONFIG_HID_SENSOR_TEMP=m -CONFIG_HUAWEI_WMI=m -# CONFIG_HX711 is not set -# CONFIG_I2C_CBUS_GPIO is not set -CONFIG_I2C_DESIGNWARE_SLAVE=y -# CONFIG_I2C_GPIO is not set -CONFIG_I2C_MULTI_INSTANTIATE=m -CONFIG_I2C_MUX_GPIO=m -CONFIG_I2C_MUX_LTC4306=m -CONFIG_I2C_MUX_PCA954x=m -# CONFIG_INPUT_DRV260X_HAPTICS is not set -# CONFIG_INPUT_GP2A is not set -# CONFIG_INPUT_GPIO_BEEPER is not set -# CONFIG_INPUT_GPIO_DECODER is not set -# CONFIG_INPUT_GPIO_ROTARY_ENCODER is not set -# CONFIG_INPUT_GPIO_VIBRA is not set -CONFIG_INTEL_ATOMISP2_PM=m -CONFIG_INTEL_INT0002_VGPIO=m -# CONFIG_INTEL_SOC_PMIC_CHTDC_TI is not set -# CONFIG_KEYBOARD_GPIO is not set -CONFIG_KEYBOARD_GPIO_POLLED=m -# CONFIG_KEYBOARD_MATRIX is not set -CONFIG_LEDS_GPIO=m -# CONFIG_LEDS_LP3952 is not set -# CONFIG_LEDS_PCA9532_GPIO is not set -# CONFIG_LEDS_PCA955X_GPIO is not set -# CONFIG_LEDS_TRIGGER_GPIO is not set -# CONFIG_MANAGER_SBS is not set -# CONFIG_MDIO_GPIO is not set -# CONFIG_MEN_A21_WDT is not set -# CONFIG_MMA7660 is not set -# CONFIG_MMC35240 is not set -# CONFIG_MOUSE_GPIO is not set -CONFIG_PCENGINES_APU2=m -CONFIG_PEAQ_WMI=m -# CONFIG_REGULATOR_GPIO is not set -# CONFIG_REGULATOR_TPS65132 is not set -# CONFIG_RFKILL_GPIO is not set -CONFIG_SENSORS_IBMAEM=m -CONFIG_SENSORS_IBMPEX=m -CONFIG_SENSORS_IIO_HWMON=m -CONFIG_SENSORS_JC42=m -CONFIG_SENSORS_SHT15=m -CONFIG_SERIAL_MCTRL_GPIO=y -# CONFIG_SERIO_GPIO_PS2 is not set -# CONFIG_SRF04 is not set -# CONFIG_SSB_DRIVER_GPIO is not set -# CONFIG_TI_ST is not set -CONFIG_TOUCHSCREEN_AUO_PIXCIR=m -CONFIG_TOUCHSCREEN_CY8CTMG110=m -CONFIG_TOUCHSCREEN_GOODIX=m -CONFIG_TOUCHSCREEN_HIDEEP=m -CONFIG_TOUCHSCREEN_RM_TS=m -CONFIG_TOUCHSCREEN_SIS_I2C=m -CONFIG_TOUCHSCREEN_ZFORCE=m -# CONFIG_TPS65010 is not set -# CONFIG_UCB1400_CORE is not set -# CONFIG_USB_CONN_GPIO is not set -# CONFIG_USB_GPIO_VBUS is not set diff --git a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-02 b/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-02 deleted file mode 100644 index baadb5131..000000000 --- a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-02 +++ /dev/null @@ -1,3 +0,0 @@ -# Calculate format=kernel name=.config -CONFIG_DRM_VMWGFX_FBCON=y -CONFIG_DRM_VMWGFX=m diff --git a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-03 b/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-03 deleted file mode 100644 index 27f53a7c9..000000000 --- a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-03 +++ /dev/null @@ -1,13 +0,0 @@ -# Calculate format=kernel name=.config -CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y -CONFIG_CGROUP_HUGETLB=y -CONFIG_CGROUP_NET_PRIO=y -CONFIG_HUGETLBFS=y -CONFIG_HUGETLB_PAGE=y -CONFIG_IP_VS=m -CONFIG_IP_VS_NFCT=y -CONFIG_IP_VS_PROTO_TCP=y -CONFIG_IP_VS_PROTO_UDP=y -CONFIG_IP_VS_RR=m -CONFIG_NETFILTER_XT_MATCH_IPVS=m -CONFIG_NETFILTER_XT_TARGET_AUDIT=m diff --git a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-04 b/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-04 deleted file mode 100644 index 05bd6d7ba..000000000 --- a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-04 +++ /dev/null @@ -1,31 +0,0 @@ -# Calculate format=kernel name=.config -CONFIG_EXFAT_DEFAULT_CODEPAGE=437 -CONFIG_EXFAT_DEFAULT_IOCHARSET="utf8" -CONFIG_EXFAT_DELAYED_SYNC=y -CONFIG_EXFAT_DISCARD=y -# CONFIG_EXFAT_DONT_MOUNT_VFAT is not set -CONFIG_EXFAT_FS=m -# CONFIG_EXFAT_DEBUG_MSG is not set -# CONFIG_EXFAT_KERNEL_DEBUG is not set -# CONFIG_IP_VS_DEBUG is not set -# CONFIG_IP_VS_DH is not set -# CONFIG_IP_VS_FO is not set -# CONFIG_IP_VS_FTP is not set -# CONFIG_IP_VS_IPV6 is not set -# CONFIG_IP_VS_LBLC is not set -# CONFIG_IP_VS_LBLCR is not set -# CONFIG_IP_VS_LC is not set -# CONFIG_IP_VS_MH is not set -CONFIG_IP_VS_MH_TAB_INDEX=12 -# CONFIG_IP_VS_NQ is not set -# CONFIG_IP_VS_OVF is not set -# CONFIG_IP_VS_PE_SIP is not set -# CONFIG_IP_VS_PROTO_AH is not set -# CONFIG_IP_VS_PROTO_ESP is not set -# CONFIG_IP_VS_PROTO_SCTP is not set -# CONFIG_IP_VS_SED is not set -# CONFIG_IP_VS_SH is not set -CONFIG_IP_VS_SH_TAB_BITS=8 -CONFIG_IP_VS_TAB_BITS=12 -# CONFIG_IP_VS_WLC is not set -# CONFIG_IP_VS_WRR is not set diff --git a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-05 b/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-05 deleted file mode 100644 index 4ae22bf04..000000000 --- a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-05 +++ /dev/null @@ -1,2 +0,0 @@ -# Calculate format=kernel name=.config -CONFIG_USB_PRINTER=m diff --git a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-06 b/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-06 deleted file mode 100644 index 8ac43bf3d..000000000 --- a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-06 +++ /dev/null @@ -1,7 +0,0 @@ -# Calculate format=kernel name=.config -CONFIG_CRYPTO_LZ4HC=m -CONFIG_CRYPTO_LZ4=m -CONFIG_LZ4_COMPRESS=m -CONFIG_LZ4HC_COMPRESS=m -CONFIG_NET_REDIRECT=y -CONFIG_ZRAM_WRITEBACK=y diff --git a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-07 b/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-07 deleted file mode 100644 index aa58e79a1..000000000 --- a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-07 +++ /dev/null @@ -1,4 +0,0 @@ -# Calculate format=kernel name=.config -CONFIG_SQUASHFS_4K_DEVBLK_SIZE=y -# CONFIG_SQUASHFS_DECOMP_MULTI is not set -CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU=y diff --git a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-08 b/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-08 deleted file mode 100644 index 874200b24..000000000 --- a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-08 +++ /dev/null @@ -1,10 +0,0 @@ -# Calculate format=kernel name=.config -CONFIG_CRASH_DUMP=y -# CONFIG_PROC_VMCORE_DEVICE_DUMP is not set -CONFIG_PROC_VMCORE=y -# CONFIG_UFS_DEBUG is not set -CONFIG_UFS_FS=m -# CONFIG_UFS_FS_WRITE is not set -# CONFIG_VMWARE_BALLOON is not set -CONFIG_VMWARE_VMCI=m -CONFIG_VMWARE_VMCI_VSOCKETS=m diff --git a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-09 b/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-09 deleted file mode 100644 index bdcb9b2f2..000000000 --- a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-09 +++ /dev/null @@ -1,24 +0,0 @@ -# Calculate format=kernel name=.config -CONFIG_ACPI_ADXL=y -# CONFIG_ACPI_EXTLOG is not set -CONFIG_EDAC_AMD64_ERROR_INJECTION=y -CONFIG_EDAC_AMD64=m -# CONFIG_EDAC_DEBUG is not set -CONFIG_EDAC_DECODE_MCE=m -CONFIG_EDAC_E752X=m -CONFIG_EDAC_I10NM=m -CONFIG_EDAC_I3000=m -CONFIG_EDAC_I3200=m -CONFIG_EDAC_I5000=m -CONFIG_EDAC_I5100=m -CONFIG_EDAC_I5400=m -CONFIG_EDAC_I7300=m -CONFIG_EDAC_I7CORE=m -CONFIG_EDAC_I82975X=m -CONFIG_EDAC_IE31200=m -CONFIG_EDAC_LEGACY_SYSFS=y -CONFIG_EDAC=m -CONFIG_EDAC_PND2=m -CONFIG_EDAC_SBRIDGE=m -CONFIG_EDAC_SKX=m -CONFIG_EDAC_X38=m diff --git a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-10 b/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-10 deleted file mode 100644 index f2d26e83f..000000000 --- a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/11-update-10 +++ /dev/null @@ -1,14 +0,0 @@ -# Calculate format=kernel name=.config -CONFIG_HW_RANDOM_TPM=y -# CONFIG_TCG_ATMEL is not set -CONFIG_TCG_CRB=m -# CONFIG_TCG_INFINEON is not set -# CONFIG_TCG_NSC is not set -# CONFIG_TCG_TIS_I2C_ATMEL is not set -# CONFIG_TCG_TIS_I2C_INFINEON is not set -# CONFIG_TCG_TIS_I2C_NUVOTON is not set -# CONFIG_TCG_TIS is not set -# CONFIG_TCG_TIS_ST33ZP24_I2C is not set -CONFIG_TCG_TPM=m -# CONFIG_TCG_VTPM_PROXY is not set -# CONFIG_TRUSTED_KEYS is not set diff --git a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/30-server b/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/30-server deleted file mode 100644 index 39dd3ea24..000000000 --- a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/30-server +++ /dev/null @@ -1,14 +0,0 @@ -# Calculate format=kernel name=.config merge(sys-kernel/calculate-sources[-desktop])!= -# CONFIG_BT is not set -# CONFIG_GAMEPORT is not set -# CONFIG_HZ_1000 is not set -# CONFIG_INPUT_JOYSTICK is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_INPUT_TABLET is not set -# CONFIG_INPUT_TOUCHSCREEN is not set -# CONFIG_MACINTOSH_DRIVERS is not set -# CONFIG_MEDIA_SUPPORT is not set -# CONFIG_PREEMPT is not set -# CONFIG_RC_CORE is not set -# CONFIG_SOUND is not set -# CONFIG_USB_GADGET is not set diff --git a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/40-zstd b/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/40-zstd deleted file mode 100644 index c19983680..000000000 --- a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/40-zstd +++ /dev/null @@ -1,6 +0,0 @@ -# Calculate format=kernel name=.config -# CONFIG_KERNEL_XZ is not set -CONFIG_KERNEL_ZSTD=y -# CONFIG_MODULE_COMPRESS_XZ is not set -CONFIG_MODULE_COMPRESS_ZSTD=y -CONFIG_ZSTD_DECOMPRESS=y diff --git a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/4500_uksm.patch b/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/4500_uksm.patch deleted file mode 100644 index b8a2f042b..000000000 --- a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/4500_uksm.patch +++ /dev/null @@ -1,6955 +0,0 @@ -# Calculate format=diff merge(sys-kernel/calculate-sources[uksm])!= -diff --git a/Documentation/vm/uksm.txt b/Documentation/vm/uksm.txt -new file mode 100644 -index 000000000000..be19a3127001 ---- /dev/null -+++ b/Documentation/vm/uksm.txt -@@ -0,0 +1,61 @@ -+The Ultra Kernel Samepage Merging feature -+---------------------------------------------- -+/* -+ * Ultra KSM. Copyright (C) 2011-2012 Nai Xia -+ * -+ * This is an improvement upon KSM. Some basic data structures and routines -+ * are borrowed from ksm.c . -+ * -+ * Its new features: -+ * 1. Full system scan: -+ * It automatically scans all user processes' anonymous VMAs. Kernel-user -+ * interaction to submit a memory area to KSM is no longer needed. -+ * -+ * 2. Rich area detection: -+ * It automatically detects rich areas containing abundant duplicated -+ * pages based. Rich areas are given a full scan speed. Poor areas are -+ * sampled at a reasonable speed with very low CPU consumption. -+ * -+ * 3. Ultra Per-page scan speed improvement: -+ * A new hash algorithm is proposed. As a result, on a machine with -+ * Core(TM)2 Quad Q9300 CPU in 32-bit mode and 800MHZ DDR2 main memory, it -+ * can scan memory areas that does not contain duplicated pages at speed of -+ * 627MB/sec ~ 2445MB/sec and can merge duplicated areas at speed of -+ * 477MB/sec ~ 923MB/sec. -+ * -+ * 4. Thrashing area avoidance: -+ * Thrashing area(an VMA that has frequent Ksm page break-out) can be -+ * filtered out. My benchmark shows it's more efficient than KSM's per-page -+ * hash value based volatile page detection. -+ * -+ * -+ * 5. Misc changes upon KSM: -+ * * It has a fully x86-opitmized memcmp dedicated for 4-byte-aligned page -+ * comparison. It's much faster than default C version on x86. -+ * * rmap_item now has an struct *page member to loosely cache a -+ * address-->page mapping, which reduces too much time-costly -+ * follow_page(). -+ * * The VMA creation/exit procedures are hooked to let the Ultra KSM know. -+ * * try_to_merge_two_pages() now can revert a pte if it fails. No break_ -+ * ksm is needed for this case. -+ * -+ * 6. Full Zero Page consideration(contributed by Figo Zhang) -+ * Now uksmd consider full zero pages as special pages and merge them to an -+ * special unswappable uksm zero page. -+ */ -+ -+ChangeLog: -+ -+2012-05-05 The creation of this Doc -+2012-05-08 UKSM 0.1.1.1 libc crash bug fix, api clean up, doc clean up. -+2012-05-28 UKSM 0.1.1.2 bug fix release -+2012-06-26 UKSM 0.1.2-beta1 first beta release for 0.1.2 -+2012-07-2 UKSM 0.1.2-beta2 -+2012-07-10 UKSM 0.1.2-beta3 -+2012-07-26 UKSM 0.1.2 Fine grained speed control, more scan optimization. -+2012-10-13 UKSM 0.1.2.1 Bug fixes. -+2012-12-31 UKSM 0.1.2.2 Minor bug fixes. -+2014-07-02 UKSM 0.1.2.3 Fix a " __this_cpu_read() in preemptible bug". -+2015-04-22 UKSM 0.1.2.4 Fix a race condition that can sometimes trigger anonying warnings. -+2016-09-10 UKSM 0.1.2.5 Fix a bug in dedup ratio calculation. -+2017-02-26 UKSM 0.1.2.6 Fix a bug in hugetlbpage handling and a race bug with page migration. -diff --git a/fs/exec.c b/fs/exec.c -index 555e93c7dec8..7e1cfd568e6c 100644 ---- a/fs/exec.c -+++ b/fs/exec.c -@@ -63,6 +63,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -1384,6 +1385,7 @@ void setup_new_exec(struct linux_binprm * bprm) - /* An exec changes our domain. We are no longer part of the thread - group */ - current->self_exec_id++; -+ - flush_signal_handlers(current, 0); - } - EXPORT_SYMBOL(setup_new_exec); -diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c -index 8c1f1bb1a5ce..62e28cf10bbf 100644 ---- a/fs/proc/meminfo.c -+++ b/fs/proc/meminfo.c -@@ -105,7 +105,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) - global_zone_page_state(NR_KERNEL_STACK_KB)); - show_val_kb(m, "PageTables: ", - global_zone_page_state(NR_PAGETABLE)); -- -+#ifdef CONFIG_UKSM -+ show_val_kb(m, "KsmZeroPages: ", -+ global_zone_page_state(NR_UKSM_ZERO_PAGES)); -+#endif - show_val_kb(m, "NFS_Unstable: ", - global_node_page_state(NR_UNSTABLE_NFS)); - show_val_kb(m, "Bounce: ", -diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h -index 818691846c90..b2169f37c696 100644 ---- a/include/asm-generic/pgtable.h -+++ b/include/asm-generic/pgtable.h -@@ -855,12 +855,25 @@ extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, - extern void untrack_pfn_moved(struct vm_area_struct *vma); - #endif - -+#ifdef CONFIG_UKSM -+static inline int is_uksm_zero_pfn(unsigned long pfn) -+{ -+ extern unsigned long uksm_zero_pfn; -+ return pfn == uksm_zero_pfn; -+} -+#else -+static inline int is_uksm_zero_pfn(unsigned long pfn) -+{ -+ return 0; -+} -+#endif -+ - #ifdef __HAVE_COLOR_ZERO_PAGE - static inline int is_zero_pfn(unsigned long pfn) - { - extern unsigned long zero_pfn; - unsigned long offset_from_zero_pfn = pfn - zero_pfn; -- return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); -+ return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT) || is_uksm_zero_pfn(pfn); - } - - #define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr)) -@@ -869,7 +882,7 @@ static inline int is_zero_pfn(unsigned long pfn) - static inline int is_zero_pfn(unsigned long pfn) - { - extern unsigned long zero_pfn; -- return pfn == zero_pfn; -+ return (pfn == zero_pfn) || (is_uksm_zero_pfn(pfn)); - } - - static inline unsigned long my_zero_pfn(unsigned long addr) -diff --git a/include/linux/ksm.h b/include/linux/ksm.h -index e48b1e453ff5..095d59310ce0 100644 ---- a/include/linux/ksm.h -+++ b/include/linux/ksm.h -@@ -1,4 +1,4 @@ --/* SPDX-License-Identifier: GPL-2.0 */ -+/* SPDX-License-Identifier: GPL-3.0 */ - #ifndef __LINUX_KSM_H - #define __LINUX_KSM_H - /* -@@ -21,20 +21,16 @@ struct mem_cgroup; - #ifdef CONFIG_KSM - int ksm_madvise(struct vm_area_struct *vma, unsigned long start, - unsigned long end, int advice, unsigned long *vm_flags); --int __ksm_enter(struct mm_struct *mm); --void __ksm_exit(struct mm_struct *mm); - --static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) -+static inline struct stable_node *page_stable_node(struct page *page) - { -- if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) -- return __ksm_enter(mm); -- return 0; -+ return PageKsm(page) ? page_rmapping(page) : NULL; - } - --static inline void ksm_exit(struct mm_struct *mm) -+static inline void set_page_stable_node(struct page *page, -+ struct stable_node *stable_node) - { -- if (test_bit(MMF_VM_MERGEABLE, &mm->flags)) -- __ksm_exit(mm); -+ page->mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM); - } - - /* -@@ -56,6 +52,33 @@ void ksm_migrate_page(struct page *newpage, struct page *oldpage); - bool reuse_ksm_page(struct page *page, - struct vm_area_struct *vma, unsigned long address); - -+#ifdef CONFIG_KSM_LEGACY -+int __ksm_enter(struct mm_struct *mm); -+void __ksm_exit(struct mm_struct *mm); -+static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) -+{ -+ if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) -+ return __ksm_enter(mm); -+ return 0; -+} -+ -+static inline void ksm_exit(struct mm_struct *mm) -+{ -+ if (test_bit(MMF_VM_MERGEABLE, &mm->flags)) -+ __ksm_exit(mm); -+} -+ -+#elif defined(CONFIG_UKSM) -+static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) -+{ -+ return 0; -+} -+ -+static inline void ksm_exit(struct mm_struct *mm) -+{ -+} -+#endif /* !CONFIG_UKSM */ -+ - #else /* !CONFIG_KSM */ - - static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) -@@ -96,4 +119,6 @@ static inline bool reuse_ksm_page(struct page *page, - #endif /* CONFIG_MMU */ - #endif /* !CONFIG_KSM */ - -+#include -+ - #endif /* __LINUX_KSM_H */ -diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h -index 270aa8fd2800..8e1eaf6a2de3 100644 ---- a/include/linux/mm_types.h -+++ b/include/linux/mm_types.h -@@ -353,6 +353,9 @@ struct vm_area_struct { - struct mempolicy *vm_policy; /* NUMA policy for the VMA */ - #endif - struct vm_userfaultfd_ctx vm_userfaultfd_ctx; -+#ifdef CONFIG_UKSM -+ struct vma_slot *uksm_vma_slot; -+#endif - } __randomize_layout; - - struct core_thread { -diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h -index bda20282746b..dab82e7cc3df 100644 ---- a/include/linux/mmzone.h -+++ b/include/linux/mmzone.h -@@ -206,6 +206,9 @@ enum zone_stat_item { - NR_ZSPAGES, /* allocated in zsmalloc */ - #endif - NR_FREE_CMA_PAGES, -+#ifdef CONFIG_UKSM -+ NR_UKSM_ZERO_PAGES, -+#endif - NR_VM_ZONE_STAT_ITEMS }; - - enum node_stat_item { -diff --git a/include/linux/sradix-tree.h b/include/linux/sradix-tree.h -new file mode 100644 -index 000000000000..d71edba6b63f ---- /dev/null -+++ b/include/linux/sradix-tree.h -@@ -0,0 +1,77 @@ -+#ifndef _LINUX_SRADIX_TREE_H -+#define _LINUX_SRADIX_TREE_H -+ -+ -+#define INIT_SRADIX_TREE(root, mask) \ -+do { \ -+ (root)->height = 0; \ -+ (root)->gfp_mask = (mask); \ -+ (root)->rnode = NULL; \ -+} while (0) -+ -+#define ULONG_BITS (sizeof(unsigned long) * 8) -+#define SRADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) -+//#define SRADIX_TREE_MAP_SHIFT 6 -+//#define SRADIX_TREE_MAP_SIZE (1UL << SRADIX_TREE_MAP_SHIFT) -+//#define SRADIX_TREE_MAP_MASK (SRADIX_TREE_MAP_SIZE-1) -+ -+struct sradix_tree_node { -+ unsigned int height; /* Height from the bottom */ -+ unsigned int count; -+ unsigned int fulls; /* Number of full sublevel trees */ -+ struct sradix_tree_node *parent; -+ void *stores[0]; -+}; -+ -+/* A simple radix tree implementation */ -+struct sradix_tree_root { -+ unsigned int height; -+ struct sradix_tree_node *rnode; -+ -+ /* Where found to have available empty stores in its sublevels */ -+ struct sradix_tree_node *enter_node; -+ unsigned int shift; -+ unsigned int stores_size; -+ unsigned int mask; -+ unsigned long min; /* The first hole index */ -+ unsigned long num; -+ //unsigned long *height_to_maxindex; -+ -+ /* How the node is allocated and freed. */ -+ struct sradix_tree_node *(*alloc)(void); -+ void (*free)(struct sradix_tree_node *node); -+ -+ /* When a new node is added and removed */ -+ void (*extend)(struct sradix_tree_node *parent, struct sradix_tree_node *child); -+ void (*assign)(struct sradix_tree_node *node, unsigned int index, void *item); -+ void (*rm)(struct sradix_tree_node *node, unsigned int offset); -+}; -+ -+struct sradix_tree_path { -+ struct sradix_tree_node *node; -+ int offset; -+}; -+ -+static inline -+void init_sradix_tree_root(struct sradix_tree_root *root, unsigned long shift) -+{ -+ root->height = 0; -+ root->rnode = NULL; -+ root->shift = shift; -+ root->stores_size = 1UL << shift; -+ root->mask = root->stores_size - 1; -+} -+ -+ -+extern void *sradix_tree_next(struct sradix_tree_root *root, -+ struct sradix_tree_node *node, unsigned long index, -+ int (*iter)(void *, unsigned long)); -+ -+extern int sradix_tree_enter(struct sradix_tree_root *root, void **item, int num); -+ -+extern void sradix_tree_delete_from_leaf(struct sradix_tree_root *root, -+ struct sradix_tree_node *node, unsigned long index); -+ -+extern void *sradix_tree_lookup(struct sradix_tree_root *root, unsigned long index); -+ -+#endif /* _LINUX_SRADIX_TREE_H */ -diff --git a/include/linux/uksm.h b/include/linux/uksm.h -new file mode 100644 -index 000000000000..bb8651f534f2 ---- /dev/null -+++ b/include/linux/uksm.h -@@ -0,0 +1,149 @@ -+#ifndef __LINUX_UKSM_H -+#define __LINUX_UKSM_H -+/* -+ * Memory merging support. -+ * -+ * This code enables dynamic sharing of identical pages found in different -+ * memory areas, even if they are not shared by fork(). -+ */ -+ -+/* if !CONFIG_UKSM this file should not be compiled at all. */ -+#ifdef CONFIG_UKSM -+ -+#include -+#include -+#include -+#include -+#include -+ -+extern unsigned long zero_pfn __read_mostly; -+extern unsigned long uksm_zero_pfn __read_mostly; -+extern struct page *empty_uksm_zero_page; -+ -+/* must be done before linked to mm */ -+extern void uksm_vma_add_new(struct vm_area_struct *vma); -+extern void uksm_remove_vma(struct vm_area_struct *vma); -+ -+#define UKSM_SLOT_NEED_SORT (1 << 0) -+#define UKSM_SLOT_NEED_RERAND (1 << 1) -+#define UKSM_SLOT_SCANNED (1 << 2) /* It's scanned in this round */ -+#define UKSM_SLOT_FUL_SCANNED (1 << 3) -+#define UKSM_SLOT_IN_UKSM (1 << 4) -+ -+struct vma_slot { -+ struct sradix_tree_node *snode; -+ unsigned long sindex; -+ -+ struct list_head slot_list; -+ unsigned long fully_scanned_round; -+ unsigned long dedup_num; -+ unsigned long pages_scanned; -+ unsigned long this_sampled; -+ unsigned long last_scanned; -+ unsigned long pages_to_scan; -+ struct scan_rung *rung; -+ struct page **rmap_list_pool; -+ unsigned int *pool_counts; -+ unsigned long pool_size; -+ struct vm_area_struct *vma; -+ struct mm_struct *mm; -+ unsigned long ctime_j; -+ unsigned long pages; -+ unsigned long flags; -+ unsigned long pages_cowed; /* pages cowed this round */ -+ unsigned long pages_merged; /* pages merged this round */ -+ unsigned long pages_bemerged; -+ -+ /* when it has page merged in this eval round */ -+ struct list_head dedup_list; -+}; -+ -+static inline void uksm_unmap_zero_page(pte_t pte) -+{ -+ if (pte_pfn(pte) == uksm_zero_pfn) -+ __dec_zone_page_state(empty_uksm_zero_page, NR_UKSM_ZERO_PAGES); -+} -+ -+static inline void uksm_map_zero_page(pte_t pte) -+{ -+ if (pte_pfn(pte) == uksm_zero_pfn) -+ __inc_zone_page_state(empty_uksm_zero_page, NR_UKSM_ZERO_PAGES); -+} -+ -+static inline void uksm_cow_page(struct vm_area_struct *vma, struct page *page) -+{ -+ if (vma->uksm_vma_slot && PageKsm(page)) -+ vma->uksm_vma_slot->pages_cowed++; -+} -+ -+static inline void uksm_cow_pte(struct vm_area_struct *vma, pte_t pte) -+{ -+ if (vma->uksm_vma_slot && pte_pfn(pte) == uksm_zero_pfn) -+ vma->uksm_vma_slot->pages_cowed++; -+} -+ -+static inline int uksm_flags_can_scan(unsigned long vm_flags) -+{ -+#ifdef VM_SAO -+ if (vm_flags & VM_SAO) -+ return 0; -+#endif -+ -+ return !(vm_flags & (VM_PFNMAP | VM_IO | VM_DONTEXPAND | -+ VM_HUGETLB | VM_MIXEDMAP | VM_SHARED -+ | VM_MAYSHARE | VM_GROWSUP | VM_GROWSDOWN)); -+} -+ -+static inline void uksm_vm_flags_mod(unsigned long *vm_flags_p) -+{ -+ if (uksm_flags_can_scan(*vm_flags_p)) -+ *vm_flags_p |= VM_MERGEABLE; -+} -+ -+/* -+ * Just a wrapper for BUG_ON for where ksm_zeropage must not be. TODO: it will -+ * be removed when uksm zero page patch is stable enough. -+ */ -+static inline void uksm_bugon_zeropage(pte_t pte) -+{ -+ BUG_ON(pte_pfn(pte) == uksm_zero_pfn); -+} -+#else -+static inline void uksm_vma_add_new(struct vm_area_struct *vma) -+{ -+} -+ -+static inline void uksm_remove_vma(struct vm_area_struct *vma) -+{ -+} -+ -+static inline void uksm_unmap_zero_page(pte_t pte) -+{ -+} -+ -+static inline void uksm_map_zero_page(pte_t pte) -+{ -+} -+ -+static inline void uksm_cow_page(struct vm_area_struct *vma, struct page *page) -+{ -+} -+ -+static inline void uksm_cow_pte(struct vm_area_struct *vma, pte_t pte) -+{ -+} -+ -+static inline int uksm_flags_can_scan(unsigned long vm_flags) -+{ -+ return 0; -+} -+ -+static inline void uksm_vm_flags_mod(unsigned long *vm_flags_p) -+{ -+} -+ -+static inline void uksm_bugon_zeropage(pte_t pte) -+{ -+} -+#endif /* !CONFIG_UKSM */ -+#endif /* __LINUX_UKSM_H */ -diff --git a/kernel/fork.c b/kernel/fork.c -index 13b38794efb5..acb562745f1c 100644 ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -595,7 +595,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, - __vma_link_rb(mm, tmp, rb_link, rb_parent); - rb_link = &tmp->vm_rb.rb_right; - rb_parent = &tmp->vm_rb; -- -+ uksm_vma_add_new(tmp); - mm->map_count++; - if (!(tmp->vm_flags & VM_WIPEONFORK)) - retval = copy_page_range(mm, oldmm, mpnt); -diff --git a/lib/Makefile b/lib/Makefile -index c5892807e06f..4b0d37da9fa0 100644 ---- a/lib/Makefile -+++ b/lib/Makefile -@@ -25,7 +25,7 @@ CFLAGS_string.o := $(call cc-option, -fno-stack-protector) - endif - - lib-y := ctype.o string.o vsprintf.o cmdline.o \ -- rbtree.o radix-tree.o timerqueue.o xarray.o \ -+ rbtree.o radix-tree.o sradix-tree.o timerqueue.o xarray.o \ - idr.o extable.o \ - sha1.o chacha.o irq_regs.o argv_split.o \ - flex_proportions.o ratelimit.o show_mem.o \ -diff --git a/lib/sradix-tree.c b/lib/sradix-tree.c -new file mode 100644 -index 000000000000..ab21e6309b93 ---- /dev/null -+++ b/lib/sradix-tree.c -@@ -0,0 +1,476 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static inline int sradix_node_full(struct sradix_tree_root *root, struct sradix_tree_node *node) -+{ -+ return node->fulls == root->stores_size || -+ (node->height == 1 && node->count == root->stores_size); -+} -+ -+/* -+ * Extend a sradix tree so it can store key @index. -+ */ -+static int sradix_tree_extend(struct sradix_tree_root *root, unsigned long index) -+{ -+ struct sradix_tree_node *node; -+ unsigned int height; -+ -+ if (unlikely(root->rnode == NULL)) { -+ if (!(node = root->alloc())) -+ return -ENOMEM; -+ -+ node->height = 1; -+ root->rnode = node; -+ root->height = 1; -+ } -+ -+ /* Figure out what the height should be. */ -+ height = root->height; -+ index >>= root->shift * height; -+ -+ while (index) { -+ index >>= root->shift; -+ height++; -+ } -+ -+ while (height > root->height) { -+ unsigned int newheight; -+ -+ if (!(node = root->alloc())) -+ return -ENOMEM; -+ -+ /* Increase the height. */ -+ node->stores[0] = root->rnode; -+ root->rnode->parent = node; -+ if (root->extend) -+ root->extend(node, root->rnode); -+ -+ newheight = root->height + 1; -+ node->height = newheight; -+ node->count = 1; -+ if (sradix_node_full(root, root->rnode)) -+ node->fulls = 1; -+ -+ root->rnode = node; -+ root->height = newheight; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Search the next item from the current node, that is not NULL -+ * and can satify root->iter(). -+ */ -+void *sradix_tree_next(struct sradix_tree_root *root, -+ struct sradix_tree_node *node, unsigned long index, -+ int (*iter)(void *item, unsigned long height)) -+{ -+ unsigned long offset; -+ void *item; -+ -+ if (unlikely(node == NULL)) { -+ node = root->rnode; -+ for (offset = 0; offset < root->stores_size; offset++) { -+ item = node->stores[offset]; -+ if (item && (!iter || iter(item, node->height))) -+ break; -+ } -+ -+ if (unlikely(offset >= root->stores_size)) -+ return NULL; -+ -+ if (node->height == 1) -+ return item; -+ else -+ goto go_down; -+ } -+ -+ while (node) { -+ offset = (index & root->mask) + 1; -+ for (; offset < root->stores_size; offset++) { -+ item = node->stores[offset]; -+ if (item && (!iter || iter(item, node->height))) -+ break; -+ } -+ -+ if (offset < root->stores_size) -+ break; -+ -+ node = node->parent; -+ index >>= root->shift; -+ } -+ -+ if (!node) -+ return NULL; -+ -+ while (node->height > 1) { -+go_down: -+ node = item; -+ for (offset = 0; offset < root->stores_size; offset++) { -+ item = node->stores[offset]; -+ if (item && (!iter || iter(item, node->height))) -+ break; -+ } -+ -+ if (unlikely(offset >= root->stores_size)) -+ return NULL; -+ } -+ -+ BUG_ON(offset > root->stores_size); -+ -+ return item; -+} -+ -+/* -+ * Blindly insert the item to the tree. Typically, we reuse the -+ * first empty store item. -+ */ -+int sradix_tree_enter(struct sradix_tree_root *root, void **item, int num) -+{ -+ unsigned long index; -+ unsigned int height; -+ struct sradix_tree_node *node, *tmp = NULL; -+ int offset, offset_saved; -+ void **store = NULL; -+ int error, i, j, shift; -+ -+go_on: -+ index = root->min; -+ -+ if (root->enter_node && !sradix_node_full(root, root->enter_node)) { -+ node = root->enter_node; -+ BUG_ON((index >> (root->shift * root->height))); -+ } else { -+ node = root->rnode; -+ if (node == NULL || (index >> (root->shift * root->height)) -+ || sradix_node_full(root, node)) { -+ error = sradix_tree_extend(root, index); -+ if (error) -+ return error; -+ -+ node = root->rnode; -+ } -+ } -+ -+ -+ height = node->height; -+ shift = (height - 1) * root->shift; -+ offset = (index >> shift) & root->mask; -+ while (shift > 0) { -+ offset_saved = offset; -+ for (; offset < root->stores_size; offset++) { -+ store = &node->stores[offset]; -+ tmp = *store; -+ -+ if (!tmp || !sradix_node_full(root, tmp)) -+ break; -+ } -+ BUG_ON(offset >= root->stores_size); -+ -+ if (offset != offset_saved) { -+ index += (offset - offset_saved) << shift; -+ index &= ~((1UL << shift) - 1); -+ } -+ -+ if (!tmp) { -+ if (!(tmp = root->alloc())) -+ return -ENOMEM; -+ -+ tmp->height = shift / root->shift; -+ *store = tmp; -+ tmp->parent = node; -+ node->count++; -+// if (root->extend) -+// root->extend(node, tmp); -+ } -+ -+ node = tmp; -+ shift -= root->shift; -+ offset = (index >> shift) & root->mask; -+ } -+ -+ BUG_ON(node->height != 1); -+ -+ -+ store = &node->stores[offset]; -+ for (i = 0, j = 0; -+ j < root->stores_size - node->count && -+ i < root->stores_size - offset && j < num; i++) { -+ if (!store[i]) { -+ store[i] = item[j]; -+ if (root->assign) -+ root->assign(node, index + i, item[j]); -+ j++; -+ } -+ } -+ -+ node->count += j; -+ root->num += j; -+ num -= j; -+ -+ while (sradix_node_full(root, node)) { -+ node = node->parent; -+ if (!node) -+ break; -+ -+ node->fulls++; -+ } -+ -+ if (unlikely(!node)) { -+ /* All nodes are full */ -+ root->min = 1 << (root->height * root->shift); -+ root->enter_node = NULL; -+ } else { -+ root->min = index + i - 1; -+ root->min |= (1UL << (node->height - 1)) - 1; -+ root->min++; -+ root->enter_node = node; -+ } -+ -+ if (num) { -+ item += j; -+ goto go_on; -+ } -+ -+ return 0; -+} -+ -+ -+/** -+ * sradix_tree_shrink - shrink height of a sradix tree to minimal -+ * @root sradix tree root -+ * -+ */ -+static inline void sradix_tree_shrink(struct sradix_tree_root *root) -+{ -+ /* try to shrink tree height */ -+ while (root->height > 1) { -+ struct sradix_tree_node *to_free = root->rnode; -+ -+ /* -+ * The candidate node has more than one child, or its child -+ * is not at the leftmost store, we cannot shrink. -+ */ -+ if (to_free->count != 1 || !to_free->stores[0]) -+ break; -+ -+ root->rnode = to_free->stores[0]; -+ root->rnode->parent = NULL; -+ root->height--; -+ if (unlikely(root->enter_node == to_free)) -+ root->enter_node = NULL; -+ root->free(to_free); -+ } -+} -+ -+/* -+ * Del the item on the known leaf node and index -+ */ -+void sradix_tree_delete_from_leaf(struct sradix_tree_root *root, -+ struct sradix_tree_node *node, unsigned long index) -+{ -+ unsigned int offset; -+ struct sradix_tree_node *start, *end; -+ -+ BUG_ON(node->height != 1); -+ -+ start = node; -+ while (node && !(--node->count)) -+ node = node->parent; -+ -+ end = node; -+ if (!node) { -+ root->rnode = NULL; -+ root->height = 0; -+ root->min = 0; -+ root->num = 0; -+ root->enter_node = NULL; -+ } else { -+ offset = (index >> (root->shift * (node->height - 1))) & root->mask; -+ if (root->rm) -+ root->rm(node, offset); -+ node->stores[offset] = NULL; -+ root->num--; -+ if (root->min > index) { -+ root->min = index; -+ root->enter_node = node; -+ } -+ } -+ -+ if (start != end) { -+ do { -+ node = start; -+ start = start->parent; -+ if (unlikely(root->enter_node == node)) -+ root->enter_node = end; -+ root->free(node); -+ } while (start != end); -+ -+ /* -+ * Note that shrink may free "end", so enter_node still need to -+ * be checked inside. -+ */ -+ sradix_tree_shrink(root); -+ } else if (node->count == root->stores_size - 1) { -+ /* It WAS a full leaf node. Update the ancestors */ -+ node = node->parent; -+ while (node) { -+ node->fulls--; -+ if (node->fulls != root->stores_size - 1) -+ break; -+ -+ node = node->parent; -+ } -+ } -+} -+ -+void *sradix_tree_lookup(struct sradix_tree_root *root, unsigned long index) -+{ -+ unsigned int height, offset; -+ struct sradix_tree_node *node; -+ int shift; -+ -+ node = root->rnode; -+ if (node == NULL || (index >> (root->shift * root->height))) -+ return NULL; -+ -+ height = root->height; -+ shift = (height - 1) * root->shift; -+ -+ do { -+ offset = (index >> shift) & root->mask; -+ node = node->stores[offset]; -+ if (!node) -+ return NULL; -+ -+ shift -= root->shift; -+ } while (shift >= 0); -+ -+ return node; -+} -+ -+/* -+ * Return the item if it exists, otherwise create it in place -+ * and return the created item. -+ */ -+void *sradix_tree_lookup_create(struct sradix_tree_root *root, -+ unsigned long index, void *(*item_alloc)(void)) -+{ -+ unsigned int height, offset; -+ struct sradix_tree_node *node, *tmp; -+ void *item; -+ int shift, error; -+ -+ if (root->rnode == NULL || (index >> (root->shift * root->height))) { -+ if (item_alloc) { -+ error = sradix_tree_extend(root, index); -+ if (error) -+ return NULL; -+ } else { -+ return NULL; -+ } -+ } -+ -+ node = root->rnode; -+ height = root->height; -+ shift = (height - 1) * root->shift; -+ -+ do { -+ offset = (index >> shift) & root->mask; -+ if (!node->stores[offset]) { -+ if (!(tmp = root->alloc())) -+ return NULL; -+ -+ tmp->height = shift / root->shift; -+ node->stores[offset] = tmp; -+ tmp->parent = node; -+ node->count++; -+ node = tmp; -+ } else { -+ node = node->stores[offset]; -+ } -+ -+ shift -= root->shift; -+ } while (shift > 0); -+ -+ BUG_ON(node->height != 1); -+ offset = index & root->mask; -+ if (node->stores[offset]) { -+ return node->stores[offset]; -+ } else if (item_alloc) { -+ if (!(item = item_alloc())) -+ return NULL; -+ -+ node->stores[offset] = item; -+ -+ /* -+ * NOTE: we do NOT call root->assign here, since this item is -+ * newly created by us having no meaning. Caller can call this -+ * if it's necessary to do so. -+ */ -+ -+ node->count++; -+ root->num++; -+ -+ while (sradix_node_full(root, node)) { -+ node = node->parent; -+ if (!node) -+ break; -+ -+ node->fulls++; -+ } -+ -+ if (unlikely(!node)) { -+ /* All nodes are full */ -+ root->min = 1 << (root->height * root->shift); -+ } else { -+ if (root->min == index) { -+ root->min |= (1UL << (node->height - 1)) - 1; -+ root->min++; -+ root->enter_node = node; -+ } -+ } -+ -+ return item; -+ } else { -+ return NULL; -+ } -+ -+} -+ -+int sradix_tree_delete(struct sradix_tree_root *root, unsigned long index) -+{ -+ unsigned int height, offset; -+ struct sradix_tree_node *node; -+ int shift; -+ -+ node = root->rnode; -+ if (node == NULL || (index >> (root->shift * root->height))) -+ return -ENOENT; -+ -+ height = root->height; -+ shift = (height - 1) * root->shift; -+ -+ do { -+ offset = (index >> shift) & root->mask; -+ node = node->stores[offset]; -+ if (!node) -+ return -ENOENT; -+ -+ shift -= root->shift; -+ } while (shift > 0); -+ -+ offset = index & root->mask; -+ if (!node->stores[offset]) -+ return -ENOENT; -+ -+ sradix_tree_delete_from_leaf(root, node, index); -+ -+ return 0; -+} -diff --git a/mm/Kconfig b/mm/Kconfig -index a5dae9a7eb51..eda3a78226b0 100644 ---- a/mm/Kconfig -+++ b/mm/Kconfig -@@ -299,6 +299,32 @@ config KSM - See Documentation/vm/ksm.rst for more information: KSM is inactive - until a program has madvised that an area is MADV_MERGEABLE, and - root has set /sys/kernel/mm/ksm/run to 1 (if CONFIG_SYSFS is set). -+choice -+ prompt "Choose UKSM/KSM strategy" -+ default UKSM -+ depends on KSM -+ help -+ This option allows to select a UKSM/KSM stragety. -+ -+config UKSM -+ bool "Ultra-KSM for page merging" -+ depends on KSM -+ help -+ UKSM is inspired by the Linux kernel project \u2014 KSM(Kernel Same -+ page Merging), but with a fundamentally rewritten core algorithm. With -+ an advanced algorithm, UKSM now can transparently scans all anonymously -+ mapped user space applications with an significantly improved scan speed -+ and CPU efficiency. Since KVM is friendly to KSM, KVM can also benefit from -+ UKSM. Now UKSM has its first stable release and first real world enterprise user. -+ For more information, please goto its project page. -+ (www.kerneldedup.org) -+ -+config KSM_LEGACY -+ bool "Legacy KSM implementation" -+ depends on KSM -+ help -+ The legacy KSM implementation from Red Hat. -+endchoice - - config DEFAULT_MMAP_MIN_ADDR - int "Low address space to protect from user allocation" -diff --git a/mm/Makefile b/mm/Makefile -index d996846697ef..786cb9fad4b9 100644 ---- a/mm/Makefile -+++ b/mm/Makefile -@@ -66,7 +66,8 @@ obj-$(CONFIG_SPARSEMEM) += sparse.o - obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o - obj-$(CONFIG_SLOB) += slob.o - obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o --obj-$(CONFIG_KSM) += ksm.o -+obj-$(CONFIG_KSM_LEGACY) += ksm.o -+obj-$(CONFIG_UKSM) += uksm.o - obj-$(CONFIG_PAGE_POISONING) += page_poison.o - obj-$(CONFIG_SLAB) += slab.o - obj-$(CONFIG_SLUB) += slub.o -diff --git a/mm/ksm.c b/mm/ksm.c -index 7905934cd3ad..3e9d0064eaf2 100644 ---- a/mm/ksm.c -+++ b/mm/ksm.c -@@ -857,17 +857,6 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma, - return err; - } - --static inline struct stable_node *page_stable_node(struct page *page) --{ -- return PageKsm(page) ? page_rmapping(page) : NULL; --} -- --static inline void set_page_stable_node(struct page *page, -- struct stable_node *stable_node) --{ -- page->mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM); --} -- - #ifdef CONFIG_SYSFS - /* - * Only called through the sysfs control interface: -diff --git a/mm/memory.c b/mm/memory.c -index b1ca51a079f2..98a57e508b9b 100644 ---- a/mm/memory.c -+++ b/mm/memory.c -@@ -130,6 +130,25 @@ EXPORT_SYMBOL(zero_pfn); - - unsigned long highest_memmap_pfn __read_mostly; - -+#ifdef CONFIG_UKSM -+unsigned long uksm_zero_pfn __read_mostly; -+EXPORT_SYMBOL_GPL(uksm_zero_pfn); -+struct page *empty_uksm_zero_page; -+ -+static int __init setup_uksm_zero_page(void) -+{ -+ empty_uksm_zero_page = alloc_pages(__GFP_ZERO & ~__GFP_MOVABLE, 0); -+ if (!empty_uksm_zero_page) -+ panic("Oh boy, that early out of memory?"); -+ -+ SetPageReserved(empty_uksm_zero_page); -+ uksm_zero_pfn = page_to_pfn(empty_uksm_zero_page); -+ -+ return 0; -+} -+core_initcall(setup_uksm_zero_page); -+#endif -+ - /* - * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() - */ -@@ -141,6 +160,7 @@ static int __init init_zero_pfn(void) - core_initcall(init_zero_pfn); - - -+ - #if defined(SPLIT_RSS_COUNTING) - - void sync_mm_rss(struct mm_struct *mm) -@@ -772,8 +792,13 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, - get_page(page); - page_dup_rmap(page, false); - rss[mm_counter(page)]++; -+ -+ /* Should return NULL in vm_normal_page() */ -+ uksm_bugon_zeropage(pte); - } else if (pte_devmap(pte)) { - page = pte_page(pte); -+ } else { -+ uksm_map_zero_page(pte); - } - - out_set_pte: -@@ -1046,8 +1071,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, - ptent = ptep_get_and_clear_full(mm, addr, pte, - tlb->fullmm); - tlb_remove_tlb_entry(tlb, pte, addr); -- if (unlikely(!page)) -+ if (unlikely(!page)) { -+ uksm_unmap_zero_page(ptent); - continue; -+ } - - if (!PageAnon(page)) { - if (pte_dirty(ptent)) { -@@ -2169,8 +2196,10 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo - clear_page(kaddr); - kunmap_atomic(kaddr); - flush_dcache_page(dst); -- } else -+ } else { - copy_user_highpage(dst, src, va, vma); -+ uksm_cow_page(vma, src); -+ } - } - - static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma) -@@ -2322,6 +2351,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) - vmf->address); - if (!new_page) - goto oom; -+ uksm_cow_pte(vma, vmf->orig_pte); - } else { - new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, - vmf->address); -@@ -2351,7 +2381,9 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) - mm_counter_file(old_page)); - inc_mm_counter_fast(mm, MM_ANONPAGES); - } -+ uksm_bugon_zeropage(vmf->orig_pte); - } else { -+ uksm_unmap_zero_page(vmf->orig_pte); - inc_mm_counter_fast(mm, MM_ANONPAGES); - } - flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); -diff --git a/mm/mmap.c b/mm/mmap.c -index a7d8c84d19b7..797582240047 100644 ---- a/mm/mmap.c -+++ b/mm/mmap.c -@@ -46,6 +46,7 @@ - #include - #include - #include -+#include - #include - - #include -@@ -184,6 +185,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) - if (vma->vm_file) - fput(vma->vm_file); - mpol_put(vma_policy(vma)); -+ uksm_remove_vma(vma); - vm_area_free(vma); - return next; - } -@@ -736,9 +738,16 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, - long adjust_next = 0; - int remove_next = 0; - -+/* -+ * to avoid deadlock, ksm_remove_vma must be done before any spin_lock is -+ * acquired -+ */ -+ uksm_remove_vma(vma); -+ - if (next && !insert) { - struct vm_area_struct *exporter = NULL, *importer = NULL; - -+ uksm_remove_vma(next); - if (end >= next->vm_end) { - /* - * vma expands, overlapping all the next, and -@@ -871,6 +880,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, - end_changed = true; - } - vma->vm_pgoff = pgoff; -+ - if (adjust_next) { - next->vm_start += adjust_next << PAGE_SHIFT; - next->vm_pgoff += adjust_next; -@@ -976,6 +986,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, - if (remove_next == 2) { - remove_next = 1; - end = next->vm_end; -+ uksm_remove_vma(next); - goto again; - } - else if (next) -@@ -1002,10 +1013,14 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, - */ - VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma)); - } -+ } else { -+ if (next && !insert) -+ uksm_vma_add_new(next); - } - if (insert && file) - uprobe_mmap(insert); - -+ uksm_vma_add_new(vma); - validate_mm(mm); - - return 0; -@@ -1465,6 +1480,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr, - vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) | - mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; - -+ /* If uksm is enabled, we add VM_MERGEABLE to new VMAs. */ -+ uksm_vm_flags_mod(&vm_flags); -+ - if (flags & MAP_LOCKED) - if (!can_do_mlock()) - return -EPERM; -@@ -1835,6 +1853,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, - allow_write_access(file); - } - file = vma->vm_file; -+ uksm_vma_add_new(vma); - out: - perf_event_mmap(vma); - -@@ -1877,6 +1896,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, - if (vm_flags & VM_DENYWRITE) - allow_write_access(file); - free_vma: -+ uksm_remove_vma(vma); - vm_area_free(vma); - unacct_error: - if (charged) -@@ -2706,6 +2726,8 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, - else - err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); - -+ uksm_vma_add_new(new); -+ - /* Success. */ - if (!err) - return 0; -@@ -3011,6 +3033,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla - if ((flags & (~VM_EXEC)) != 0) - return -EINVAL; - flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; -+ uksm_vm_flags_mod(&flags); - - error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED); - if (offset_in_page(error)) -@@ -3061,6 +3084,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla - vma->vm_flags = flags; - vma->vm_page_prot = vm_get_page_prot(flags); - vma_link(mm, vma, prev, rb_link, rb_parent); -+ uksm_vma_add_new(vma); - out: - perf_event_mmap(vma); - mm->total_vm += len >> PAGE_SHIFT; -@@ -3138,6 +3162,12 @@ void exit_mmap(struct mm_struct *mm) - up_write(&mm->mmap_sem); - } - -+ /* -+ * Taking write lock on mmap_sem does not harm others, -+ * but it's crucial for uksm to avoid races. -+ */ -+ down_write(&mm->mmap_sem); -+ - if (mm->locked_vm) { - vma = mm->mmap; - while (vma) { -@@ -3172,6 +3202,11 @@ void exit_mmap(struct mm_struct *mm) - vma = remove_vma(vma); - } - vm_unacct_memory(nr_accounted); -+ -+ mm->mmap = NULL; -+ mm->mm_rb = RB_ROOT; -+ vmacache_invalidate(mm); -+ up_write(&mm->mmap_sem); - } - - /* Insert vm structure into process list sorted by address -@@ -3279,6 +3314,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, - new_vma->vm_ops->open(new_vma); - vma_link(mm, new_vma, prev, rb_link, rb_parent); - *need_rmap_locks = false; -+ uksm_vma_add_new(new_vma); - } - return new_vma; - -@@ -3429,6 +3465,7 @@ static struct vm_area_struct *__install_special_mapping( - vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT); - - perf_event_mmap(vma); -+ uksm_vma_add_new(vma); - - return vma; - -diff --git a/mm/uksm.c b/mm/uksm.c -new file mode 100644 -index 000000000000..ef068d5dc307 ---- /dev/null -+++ b/mm/uksm.c -@@ -0,0 +1,5613 @@ -+/* -+ * Ultra KSM. Copyright (C) 2011-2012 Nai Xia -+ * -+ * This is an improvement upon KSM. Some basic data structures and routines -+ * are borrowed from ksm.c . -+ * -+ * Its new features: -+ * 1. Full system scan: -+ * It automatically scans all user processes' anonymous VMAs. Kernel-user -+ * interaction to submit a memory area to KSM is no longer needed. -+ * -+ * 2. Rich area detection: -+ * It automatically detects rich areas containing abundant duplicated -+ * pages based. Rich areas are given a full scan speed. Poor areas are -+ * sampled at a reasonable speed with very low CPU consumption. -+ * -+ * 3. Ultra Per-page scan speed improvement: -+ * A new hash algorithm is proposed. As a result, on a machine with -+ * Core(TM)2 Quad Q9300 CPU in 32-bit mode and 800MHZ DDR2 main memory, it -+ * can scan memory areas that does not contain duplicated pages at speed of -+ * 627MB/sec ~ 2445MB/sec and can merge duplicated areas at speed of -+ * 477MB/sec ~ 923MB/sec. -+ * -+ * 4. Thrashing area avoidance: -+ * Thrashing area(an VMA that has frequent Ksm page break-out) can be -+ * filtered out. My benchmark shows it's more efficient than KSM's per-page -+ * hash value based volatile page detection. -+ * -+ * -+ * 5. Misc changes upon KSM: -+ * * It has a fully x86-opitmized memcmp dedicated for 4-byte-aligned page -+ * comparison. It's much faster than default C version on x86. -+ * * rmap_item now has an struct *page member to loosely cache a -+ * address-->page mapping, which reduces too much time-costly -+ * follow_page(). -+ * * The VMA creation/exit procedures are hooked to let the Ultra KSM know. -+ * * try_to_merge_two_pages() now can revert a pte if it fails. No break_ -+ * ksm is needed for this case. -+ * -+ * 6. Full Zero Page consideration(contributed by Figo Zhang) -+ * Now uksmd consider full zero pages as special pages and merge them to an -+ * special unswappable uksm zero page. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include "internal.h" -+ -+#ifdef CONFIG_X86 -+#undef memcmp -+ -+#ifdef CONFIG_X86_32 -+#define memcmp memcmpx86_32 -+/* -+ * Compare 4-byte-aligned address s1 and s2, with length n -+ */ -+int memcmpx86_32(void *s1, void *s2, size_t n) -+{ -+ size_t num = n / 4; -+ register int res; -+ -+ __asm__ __volatile__ -+ ( -+ "testl %3,%3\n\t" -+ "repe; cmpsd\n\t" -+ "je 1f\n\t" -+ "sbbl %0,%0\n\t" -+ "orl $1,%0\n" -+ "1:" -+ : "=&a" (res), "+&S" (s1), "+&D" (s2), "+&c" (num) -+ : "0" (0) -+ : "cc"); -+ -+ return res; -+} -+ -+/* -+ * Check the page is all zero ? -+ */ -+static int is_full_zero(const void *s1, size_t len) -+{ -+ unsigned char same; -+ -+ len /= 4; -+ -+ __asm__ __volatile__ -+ ("repe; scasl;" -+ "sete %0" -+ : "=qm" (same), "+D" (s1), "+c" (len) -+ : "a" (0) -+ : "cc"); -+ -+ return same; -+} -+ -+ -+#elif defined(CONFIG_X86_64) -+#define memcmp memcmpx86_64 -+/* -+ * Compare 8-byte-aligned address s1 and s2, with length n -+ */ -+int memcmpx86_64(void *s1, void *s2, size_t n) -+{ -+ size_t num = n / 8; -+ register int res; -+ -+ __asm__ __volatile__ -+ ( -+ "testq %q3,%q3\n\t" -+ "repe; cmpsq\n\t" -+ "je 1f\n\t" -+ "sbbq %q0,%q0\n\t" -+ "orq $1,%q0\n" -+ "1:" -+ : "=&a" (res), "+&S" (s1), "+&D" (s2), "+&c" (num) -+ : "0" (0) -+ : "cc"); -+ -+ return res; -+} -+ -+static int is_full_zero(const void *s1, size_t len) -+{ -+ unsigned char same; -+ -+ len /= 8; -+ -+ __asm__ __volatile__ -+ ("repe; scasq;" -+ "sete %0" -+ : "=qm" (same), "+D" (s1), "+c" (len) -+ : "a" (0) -+ : "cc"); -+ -+ return same; -+} -+ -+#endif -+#else -+static int is_full_zero(const void *s1, size_t len) -+{ -+ unsigned long *src = s1; -+ int i; -+ -+ len /= sizeof(*src); -+ -+ for (i = 0; i < len; i++) { -+ if (src[i]) -+ return 0; -+ } -+ -+ return 1; -+} -+#endif -+ -+#define UKSM_RUNG_ROUND_FINISHED (1 << 0) -+#define TIME_RATIO_SCALE 10000 -+ -+#define SLOT_TREE_NODE_SHIFT 8 -+#define SLOT_TREE_NODE_STORE_SIZE (1UL << SLOT_TREE_NODE_SHIFT) -+struct slot_tree_node { -+ unsigned long size; -+ struct sradix_tree_node snode; -+ void *stores[SLOT_TREE_NODE_STORE_SIZE]; -+}; -+ -+static struct kmem_cache *slot_tree_node_cachep; -+ -+static struct sradix_tree_node *slot_tree_node_alloc(void) -+{ -+ struct slot_tree_node *p; -+ -+ p = kmem_cache_zalloc(slot_tree_node_cachep, GFP_KERNEL | -+ __GFP_NORETRY | __GFP_NOWARN); -+ if (!p) -+ return NULL; -+ -+ return &p->snode; -+} -+ -+static void slot_tree_node_free(struct sradix_tree_node *node) -+{ -+ struct slot_tree_node *p; -+ -+ p = container_of(node, struct slot_tree_node, snode); -+ kmem_cache_free(slot_tree_node_cachep, p); -+} -+ -+static void slot_tree_node_extend(struct sradix_tree_node *parent, -+ struct sradix_tree_node *child) -+{ -+ struct slot_tree_node *p, *c; -+ -+ p = container_of(parent, struct slot_tree_node, snode); -+ c = container_of(child, struct slot_tree_node, snode); -+ -+ p->size += c->size; -+} -+ -+void slot_tree_node_assign(struct sradix_tree_node *node, -+ unsigned int index, void *item) -+{ -+ struct vma_slot *slot = item; -+ struct slot_tree_node *cur; -+ -+ slot->snode = node; -+ slot->sindex = index; -+ -+ while (node) { -+ cur = container_of(node, struct slot_tree_node, snode); -+ cur->size += slot->pages; -+ node = node->parent; -+ } -+} -+ -+void slot_tree_node_rm(struct sradix_tree_node *node, unsigned int offset) -+{ -+ struct vma_slot *slot; -+ struct slot_tree_node *cur; -+ unsigned long pages; -+ -+ if (node->height == 1) { -+ slot = node->stores[offset]; -+ pages = slot->pages; -+ } else { -+ cur = container_of(node->stores[offset], -+ struct slot_tree_node, snode); -+ pages = cur->size; -+ } -+ -+ while (node) { -+ cur = container_of(node, struct slot_tree_node, snode); -+ cur->size -= pages; -+ node = node->parent; -+ } -+} -+ -+unsigned long slot_iter_index; -+int slot_iter(void *item, unsigned long height) -+{ -+ struct slot_tree_node *node; -+ struct vma_slot *slot; -+ -+ if (height == 1) { -+ slot = item; -+ if (slot_iter_index < slot->pages) { -+ /*in this one*/ -+ return 1; -+ } else { -+ slot_iter_index -= slot->pages; -+ return 0; -+ } -+ -+ } else { -+ node = container_of(item, struct slot_tree_node, snode); -+ if (slot_iter_index < node->size) { -+ /*in this one*/ -+ return 1; -+ } else { -+ slot_iter_index -= node->size; -+ return 0; -+ } -+ } -+} -+ -+ -+static inline void slot_tree_init_root(struct sradix_tree_root *root) -+{ -+ init_sradix_tree_root(root, SLOT_TREE_NODE_SHIFT); -+ root->alloc = slot_tree_node_alloc; -+ root->free = slot_tree_node_free; -+ root->extend = slot_tree_node_extend; -+ root->assign = slot_tree_node_assign; -+ root->rm = slot_tree_node_rm; -+} -+ -+void slot_tree_init(void) -+{ -+ slot_tree_node_cachep = kmem_cache_create("slot_tree_node", -+ sizeof(struct slot_tree_node), 0, -+ SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, -+ NULL); -+} -+ -+ -+/* Each rung of this ladder is a list of VMAs having a same scan ratio */ -+struct scan_rung { -+ //struct list_head scanned_list; -+ struct sradix_tree_root vma_root; -+ struct sradix_tree_root vma_root2; -+ -+ struct vma_slot *current_scan; -+ unsigned long current_offset; -+ -+ /* -+ * The initial value for current_offset, it should loop over -+ * [0~ step - 1] to let all slot have its chance to be scanned. -+ */ -+ unsigned long offset_init; -+ unsigned long step; /* dynamic step for current_offset */ -+ unsigned int flags; -+ unsigned long pages_to_scan; -+ //unsigned long fully_scanned_slots; -+ /* -+ * a little bit tricky - if cpu_time_ratio > 0, then the value is the -+ * the cpu time ratio it can spend in rung_i for every scan -+ * period. if < 0, then it is the cpu time ratio relative to the -+ * max cpu percentage user specified. Both in unit of -+ * 1/TIME_RATIO_SCALE -+ */ -+ int cpu_ratio; -+ -+ /* -+ * How long it will take for all slots in this rung to be fully -+ * scanned? If it's zero, we don't care about the cover time: -+ * it's fully scanned. -+ */ -+ unsigned int cover_msecs; -+ //unsigned long vma_num; -+ //unsigned long pages; /* Sum of all slot's pages in rung */ -+}; -+ -+/** -+ * node of either the stable or unstale rbtree -+ * -+ */ -+struct tree_node { -+ struct rb_node node; /* link in the main (un)stable rbtree */ -+ struct rb_root sub_root; /* rb_root for sublevel collision rbtree */ -+ u32 hash; -+ unsigned long count; /* TODO: merged with sub_root */ -+ struct list_head all_list; /* all tree nodes in stable/unstable tree */ -+}; -+ -+/** -+ * struct stable_node - node of the stable rbtree -+ * @node: rb node of this ksm page in the stable tree -+ * @hlist: hlist head of rmap_items using this ksm page -+ * @kpfn: page frame number of this ksm page -+ */ -+struct stable_node { -+ struct rb_node node; /* link in sub-rbtree */ -+ struct tree_node *tree_node; /* it's tree node root in stable tree, NULL if it's in hell list */ -+ struct hlist_head hlist; -+ unsigned long kpfn; -+ u32 hash_max; /* if ==0 then it's not been calculated yet */ -+ struct list_head all_list; /* in a list for all stable nodes */ -+}; -+ -+/** -+ * struct node_vma - group rmap_items linked in a same stable -+ * node together. -+ */ -+struct node_vma { -+ union { -+ struct vma_slot *slot; -+ unsigned long key; /* slot is used as key sorted on hlist */ -+ }; -+ struct hlist_node hlist; -+ struct hlist_head rmap_hlist; -+ struct stable_node *head; -+}; -+ -+/** -+ * struct rmap_item - reverse mapping item for virtual addresses -+ * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list -+ * @anon_vma: pointer to anon_vma for this mm,address, when in stable tree -+ * @mm: the memory structure this rmap_item is pointing into -+ * @address: the virtual address this rmap_item tracks (+ flags in low bits) -+ * @node: rb node of this rmap_item in the unstable tree -+ * @head: pointer to stable_node heading this list in the stable tree -+ * @hlist: link into hlist of rmap_items hanging off that stable_node -+ */ -+struct rmap_item { -+ struct vma_slot *slot; -+ struct page *page; -+ unsigned long address; /* + low bits used for flags below */ -+ unsigned long hash_round; -+ unsigned long entry_index; -+ union { -+ struct {/* when in unstable tree */ -+ struct rb_node node; -+ struct tree_node *tree_node; -+ u32 hash_max; -+ }; -+ struct { /* when in stable tree */ -+ struct node_vma *head; -+ struct hlist_node hlist; -+ struct anon_vma *anon_vma; -+ }; -+ }; -+} __aligned(4); -+ -+struct rmap_list_entry { -+ union { -+ struct rmap_item *item; -+ unsigned long addr; -+ }; -+ /* lowest bit is used for is_addr tag */ -+} __aligned(4); /* 4 aligned to fit in to pages*/ -+ -+ -+/* Basic data structure definition ends */ -+ -+ -+/* -+ * Flags for rmap_item to judge if it's listed in the stable/unstable tree. -+ * The flags use the low bits of rmap_item.address -+ */ -+#define UNSTABLE_FLAG 0x1 -+#define STABLE_FLAG 0x2 -+#define get_rmap_addr(x) ((x)->address & PAGE_MASK) -+ -+/* -+ * rmap_list_entry helpers -+ */ -+#define IS_ADDR_FLAG 1 -+#define is_addr(ptr) ((unsigned long)(ptr) & IS_ADDR_FLAG) -+#define set_is_addr(ptr) ((ptr) |= IS_ADDR_FLAG) -+#define get_clean_addr(ptr) (((ptr) & ~(__typeof__(ptr))IS_ADDR_FLAG)) -+ -+ -+/* -+ * High speed caches for frequently allocated and freed structs -+ */ -+static struct kmem_cache *rmap_item_cache; -+static struct kmem_cache *stable_node_cache; -+static struct kmem_cache *node_vma_cache; -+static struct kmem_cache *vma_slot_cache; -+static struct kmem_cache *tree_node_cache; -+#define UKSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("uksm_"#__struct,\ -+ sizeof(struct __struct), __alignof__(struct __struct),\ -+ (__flags), NULL) -+ -+/* Array of all scan_rung, uksm_scan_ladder[0] having the minimum scan ratio */ -+#define SCAN_LADDER_SIZE 4 -+static struct scan_rung uksm_scan_ladder[SCAN_LADDER_SIZE]; -+ -+/* The evaluation rounds uksmd has finished */ -+static unsigned long long uksm_eval_round = 1; -+ -+/* -+ * we add 1 to this var when we consider we should rebuild the whole -+ * unstable tree. -+ */ -+static unsigned long uksm_hash_round = 1; -+ -+/* -+ * How many times the whole memory is scanned. -+ */ -+static unsigned long long fully_scanned_round = 1; -+ -+/* The total number of virtual pages of all vma slots */ -+static u64 uksm_pages_total; -+ -+/* The number of pages has been scanned since the start up */ -+static u64 uksm_pages_scanned; -+ -+static u64 scanned_virtual_pages; -+ -+/* The number of pages has been scanned since last encode_benefit call */ -+static u64 uksm_pages_scanned_last; -+ -+/* If the scanned number is tooo large, we encode it here */ -+static u64 pages_scanned_stored; -+ -+static unsigned long pages_scanned_base; -+ -+/* The number of nodes in the stable tree */ -+static unsigned long uksm_pages_shared; -+ -+/* The number of page slots additionally sharing those nodes */ -+static unsigned long uksm_pages_sharing; -+ -+/* The number of nodes in the unstable tree */ -+static unsigned long uksm_pages_unshared; -+ -+/* -+ * Milliseconds ksmd should sleep between scans, -+ * >= 100ms to be consistent with -+ * scan_time_to_sleep_msec() -+ */ -+static unsigned int uksm_sleep_jiffies; -+ -+/* The real value for the uksmd next sleep */ -+static unsigned int uksm_sleep_real; -+ -+/* Saved value for user input uksm_sleep_jiffies when it's enlarged */ -+static unsigned int uksm_sleep_saved; -+ -+/* Max percentage of cpu utilization ksmd can take to scan in one batch */ -+static unsigned int uksm_max_cpu_percentage; -+ -+static int uksm_cpu_governor; -+ -+static char *uksm_cpu_governor_str[4] = { "full", "medium", "low", "quiet" }; -+ -+struct uksm_cpu_preset_s { -+ int cpu_ratio[SCAN_LADDER_SIZE]; -+ unsigned int cover_msecs[SCAN_LADDER_SIZE]; -+ unsigned int max_cpu; /* percentage */ -+}; -+ -+struct uksm_cpu_preset_s uksm_cpu_preset[4] = { -+ { {20, 40, -2500, -10000}, {1000, 500, 200, 50}, 95}, -+ { {20, 30, -2500, -10000}, {1000, 500, 400, 100}, 50}, -+ { {10, 20, -5000, -10000}, {1500, 1000, 1000, 250}, 20}, -+ { {10, 20, 40, 75}, {2000, 1000, 1000, 1000}, 1}, -+}; -+ -+/* The default value for uksm_ema_page_time if it's not initialized */ -+#define UKSM_PAGE_TIME_DEFAULT 500 -+ -+/*cost to scan one page by expotional moving average in nsecs */ -+static unsigned long uksm_ema_page_time = UKSM_PAGE_TIME_DEFAULT; -+ -+/* The expotional moving average alpha weight, in percentage. */ -+#define EMA_ALPHA 20 -+ -+/* -+ * The threshold used to filter out thrashing areas, -+ * If it == 0, filtering is disabled, otherwise it's the percentage up-bound -+ * of the thrashing ratio of all areas. Any area with a bigger thrashing ratio -+ * will be considered as having a zero duplication ratio. -+ */ -+static unsigned int uksm_thrash_threshold = 50; -+ -+/* How much dedup ratio is considered to be abundant*/ -+static unsigned int uksm_abundant_threshold = 10; -+ -+/* All slots having merged pages in this eval round. */ -+struct list_head vma_slot_dedup = LIST_HEAD_INIT(vma_slot_dedup); -+ -+/* How many times the ksmd has slept since startup */ -+static unsigned long long uksm_sleep_times; -+ -+#define UKSM_RUN_STOP 0 -+#define UKSM_RUN_MERGE 1 -+static unsigned int uksm_run = 1; -+ -+static DECLARE_WAIT_QUEUE_HEAD(uksm_thread_wait); -+static DEFINE_MUTEX(uksm_thread_mutex); -+ -+/* -+ * List vma_slot_new is for newly created vma_slot waiting to be added by -+ * ksmd. If one cannot be added(e.g. due to it's too small), it's moved to -+ * vma_slot_noadd. vma_slot_del is the list for vma_slot whose corresponding -+ * VMA has been removed/freed. -+ */ -+struct list_head vma_slot_new = LIST_HEAD_INIT(vma_slot_new); -+struct list_head vma_slot_noadd = LIST_HEAD_INIT(vma_slot_noadd); -+struct list_head vma_slot_del = LIST_HEAD_INIT(vma_slot_del); -+static DEFINE_SPINLOCK(vma_slot_list_lock); -+ -+/* The unstable tree heads */ -+static struct rb_root root_unstable_tree = RB_ROOT; -+ -+/* -+ * All tree_nodes are in a list to be freed at once when unstable tree is -+ * freed after each scan round. -+ */ -+static struct list_head unstable_tree_node_list = -+ LIST_HEAD_INIT(unstable_tree_node_list); -+ -+/* List contains all stable nodes */ -+static struct list_head stable_node_list = LIST_HEAD_INIT(stable_node_list); -+ -+/* -+ * When the hash strength is changed, the stable tree must be delta_hashed and -+ * re-structured. We use two set of below structs to speed up the -+ * re-structuring of stable tree. -+ */ -+static struct list_head -+stable_tree_node_list[2] = {LIST_HEAD_INIT(stable_tree_node_list[0]), -+ LIST_HEAD_INIT(stable_tree_node_list[1])}; -+ -+static struct list_head *stable_tree_node_listp = &stable_tree_node_list[0]; -+static struct rb_root root_stable_tree[2] = {RB_ROOT, RB_ROOT}; -+static struct rb_root *root_stable_treep = &root_stable_tree[0]; -+static unsigned long stable_tree_index; -+ -+/* The hash strength needed to hash a full page */ -+#define HASH_STRENGTH_FULL (PAGE_SIZE / sizeof(u32)) -+ -+/* The hash strength needed for loop-back hashing */ -+#define HASH_STRENGTH_MAX (HASH_STRENGTH_FULL + 10) -+ -+/* The random offsets in a page */ -+static u32 *random_nums; -+ -+/* The hash strength */ -+static unsigned long hash_strength = HASH_STRENGTH_FULL >> 4; -+ -+/* The delta value each time the hash strength increases or decreases */ -+static unsigned long hash_strength_delta; -+#define HASH_STRENGTH_DELTA_MAX 5 -+ -+/* The time we have saved due to random_sample_hash */ -+static u64 rshash_pos; -+ -+/* The time we have wasted due to hash collision */ -+static u64 rshash_neg; -+ -+struct uksm_benefit { -+ u64 pos; -+ u64 neg; -+ u64 scanned; -+ unsigned long base; -+} benefit; -+ -+/* -+ * The relative cost of memcmp, compared to 1 time unit of random sample -+ * hash, this value is tested when ksm module is initialized -+ */ -+static unsigned long memcmp_cost; -+ -+static unsigned long rshash_neg_cont_zero; -+static unsigned long rshash_cont_obscure; -+ -+/* The possible states of hash strength adjustment heuristic */ -+enum rshash_states { -+ RSHASH_STILL, -+ RSHASH_TRYUP, -+ RSHASH_TRYDOWN, -+ RSHASH_NEW, -+ RSHASH_PRE_STILL, -+}; -+ -+/* The possible direction we are about to adjust hash strength */ -+enum rshash_direct { -+ GO_UP, -+ GO_DOWN, -+ OBSCURE, -+ STILL, -+}; -+ -+/* random sampling hash state machine */ -+static struct { -+ enum rshash_states state; -+ enum rshash_direct pre_direct; -+ u8 below_count; -+ /* Keep a lookup window of size 5, iff above_count/below_count > 3 -+ * in this window we stop trying. -+ */ -+ u8 lookup_window_index; -+ u64 stable_benefit; -+ unsigned long turn_point_down; -+ unsigned long turn_benefit_down; -+ unsigned long turn_point_up; -+ unsigned long turn_benefit_up; -+ unsigned long stable_point; -+} rshash_state; -+ -+/*zero page hash table, hash_strength [0 ~ HASH_STRENGTH_MAX]*/ -+static u32 *zero_hash_table; -+ -+static inline struct node_vma *alloc_node_vma(void) -+{ -+ struct node_vma *node_vma; -+ -+ node_vma = kmem_cache_zalloc(node_vma_cache, GFP_KERNEL | -+ __GFP_NORETRY | __GFP_NOWARN); -+ if (node_vma) { -+ INIT_HLIST_HEAD(&node_vma->rmap_hlist); -+ INIT_HLIST_NODE(&node_vma->hlist); -+ } -+ return node_vma; -+} -+ -+static inline void free_node_vma(struct node_vma *node_vma) -+{ -+ kmem_cache_free(node_vma_cache, node_vma); -+} -+ -+ -+static inline struct vma_slot *alloc_vma_slot(void) -+{ -+ struct vma_slot *slot; -+ -+ /* -+ * In case ksm is not initialized by now. -+ * Oops, we need to consider the call site of uksm_init() in the future. -+ */ -+ if (!vma_slot_cache) -+ return NULL; -+ -+ slot = kmem_cache_zalloc(vma_slot_cache, GFP_KERNEL | -+ __GFP_NORETRY | __GFP_NOWARN); -+ if (slot) { -+ INIT_LIST_HEAD(&slot->slot_list); -+ INIT_LIST_HEAD(&slot->dedup_list); -+ slot->flags |= UKSM_SLOT_NEED_RERAND; -+ } -+ return slot; -+} -+ -+static inline void free_vma_slot(struct vma_slot *vma_slot) -+{ -+ kmem_cache_free(vma_slot_cache, vma_slot); -+} -+ -+ -+ -+static inline struct rmap_item *alloc_rmap_item(void) -+{ -+ struct rmap_item *rmap_item; -+ -+ rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL | -+ __GFP_NORETRY | __GFP_NOWARN); -+ if (rmap_item) { -+ /* bug on lowest bit is not clear for flag use */ -+ BUG_ON(is_addr(rmap_item)); -+ } -+ return rmap_item; -+} -+ -+static inline void free_rmap_item(struct rmap_item *rmap_item) -+{ -+ rmap_item->slot = NULL; /* debug safety */ -+ kmem_cache_free(rmap_item_cache, rmap_item); -+} -+ -+static inline struct stable_node *alloc_stable_node(void) -+{ -+ struct stable_node *node; -+ -+ node = kmem_cache_alloc(stable_node_cache, GFP_KERNEL | -+ __GFP_NORETRY | __GFP_NOWARN); -+ if (!node) -+ return NULL; -+ -+ INIT_HLIST_HEAD(&node->hlist); -+ list_add(&node->all_list, &stable_node_list); -+ return node; -+} -+ -+static inline void free_stable_node(struct stable_node *stable_node) -+{ -+ list_del(&stable_node->all_list); -+ kmem_cache_free(stable_node_cache, stable_node); -+} -+ -+static inline struct tree_node *alloc_tree_node(struct list_head *list) -+{ -+ struct tree_node *node; -+ -+ node = kmem_cache_zalloc(tree_node_cache, GFP_KERNEL | -+ __GFP_NORETRY | __GFP_NOWARN); -+ if (!node) -+ return NULL; -+ -+ list_add(&node->all_list, list); -+ return node; -+} -+ -+static inline void free_tree_node(struct tree_node *node) -+{ -+ list_del(&node->all_list); -+ kmem_cache_free(tree_node_cache, node); -+} -+ -+static void uksm_drop_anon_vma(struct rmap_item *rmap_item) -+{ -+ struct anon_vma *anon_vma = rmap_item->anon_vma; -+ -+ put_anon_vma(anon_vma); -+} -+ -+ -+/** -+ * Remove a stable node from stable_tree, may unlink from its tree_node and -+ * may remove its parent tree_node if no other stable node is pending. -+ * -+ * @stable_node The node need to be removed -+ * @unlink_rb Will this node be unlinked from the rbtree? -+ * @remove_tree_ node Will its tree_node be removed if empty? -+ */ -+static void remove_node_from_stable_tree(struct stable_node *stable_node, -+ int unlink_rb, int remove_tree_node) -+{ -+ struct node_vma *node_vma; -+ struct rmap_item *rmap_item; -+ struct hlist_node *n; -+ -+ if (!hlist_empty(&stable_node->hlist)) { -+ hlist_for_each_entry_safe(node_vma, n, -+ &stable_node->hlist, hlist) { -+ hlist_for_each_entry(rmap_item, &node_vma->rmap_hlist, hlist) { -+ uksm_pages_sharing--; -+ -+ uksm_drop_anon_vma(rmap_item); -+ rmap_item->address &= PAGE_MASK; -+ } -+ free_node_vma(node_vma); -+ cond_resched(); -+ } -+ -+ /* the last one is counted as shared */ -+ uksm_pages_shared--; -+ uksm_pages_sharing++; -+ } -+ -+ if (stable_node->tree_node && unlink_rb) { -+ rb_erase(&stable_node->node, -+ &stable_node->tree_node->sub_root); -+ -+ if (RB_EMPTY_ROOT(&stable_node->tree_node->sub_root) && -+ remove_tree_node) { -+ rb_erase(&stable_node->tree_node->node, -+ root_stable_treep); -+ free_tree_node(stable_node->tree_node); -+ } else { -+ stable_node->tree_node->count--; -+ } -+ } -+ -+ free_stable_node(stable_node); -+} -+ -+ -+/* -+ * get_uksm_page: checks if the page indicated by the stable node -+ * is still its ksm page, despite having held no reference to it. -+ * In which case we can trust the content of the page, and it -+ * returns the gotten page; but if the page has now been zapped, -+ * remove the stale node from the stable tree and return NULL. -+ * -+ * You would expect the stable_node to hold a reference to the ksm page. -+ * But if it increments the page's count, swapping out has to wait for -+ * ksmd to come around again before it can free the page, which may take -+ * seconds or even minutes: much too unresponsive. So instead we use a -+ * "keyhole reference": access to the ksm page from the stable node peeps -+ * out through its keyhole to see if that page still holds the right key, -+ * pointing back to this stable node. This relies on freeing a PageAnon -+ * page to reset its page->mapping to NULL, and relies on no other use of -+ * a page to put something that might look like our key in page->mapping. -+ * -+ * include/linux/pagemap.h page_cache_get_speculative() is a good reference, -+ * but this is different - made simpler by uksm_thread_mutex being held, but -+ * interesting for assuming that no other use of the struct page could ever -+ * put our expected_mapping into page->mapping (or a field of the union which -+ * coincides with page->mapping). The RCU calls are not for KSM at all, but -+ * to keep the page_count protocol described with page_cache_get_speculative. -+ * -+ * Note: it is possible that get_uksm_page() will return NULL one moment, -+ * then page the next, if the page is in between page_freeze_refs() and -+ * page_unfreeze_refs(): this shouldn't be a problem anywhere, the page -+ * is on its way to being freed; but it is an anomaly to bear in mind. -+ * -+ * @unlink_rb: if the removal of this node will firstly unlink from -+ * its rbtree. stable_node_reinsert will prevent this when restructuring the -+ * node from its old tree. -+ * -+ * @remove_tree_node: if this is the last one of its tree_node, will the -+ * tree_node be freed ? If we are inserting stable node, this tree_node may -+ * be reused, so don't free it. -+ */ -+static struct page *get_uksm_page(struct stable_node *stable_node, -+ int unlink_rb, int remove_tree_node) -+{ -+ struct page *page; -+ void *expected_mapping; -+ unsigned long kpfn; -+ -+ expected_mapping = (void *)((unsigned long)stable_node | -+ PAGE_MAPPING_KSM); -+again: -+ kpfn = READ_ONCE(stable_node->kpfn); -+ page = pfn_to_page(kpfn); -+ -+ /* -+ * page is computed from kpfn, so on most architectures reading -+ * page->mapping is naturally ordered after reading node->kpfn, -+ * but on Alpha we need to be more careful. -+ */ -+ smp_read_barrier_depends(); -+ -+ if (READ_ONCE(page->mapping) != expected_mapping) -+ goto stale; -+ -+ /* -+ * We cannot do anything with the page while its refcount is 0. -+ * Usually 0 means free, or tail of a higher-order page: in which -+ * case this node is no longer referenced, and should be freed; -+ * however, it might mean that the page is under page_freeze_refs(). -+ * The __remove_mapping() case is easy, again the node is now stale; -+ * but if page is swapcache in migrate_page_move_mapping(), it might -+ * still be our page, in which case it's essential to keep the node. -+ */ -+ while (!get_page_unless_zero(page)) { -+ /* -+ * Another check for page->mapping != expected_mapping would -+ * work here too. We have chosen the !PageSwapCache test to -+ * optimize the common case, when the page is or is about to -+ * be freed: PageSwapCache is cleared (under spin_lock_irq) -+ * in the freeze_refs section of __remove_mapping(); but Anon -+ * page->mapping reset to NULL later, in free_pages_prepare(). -+ */ -+ if (!PageSwapCache(page)) -+ goto stale; -+ cpu_relax(); -+ } -+ -+ if (READ_ONCE(page->mapping) != expected_mapping) { -+ put_page(page); -+ goto stale; -+ } -+ -+ lock_page(page); -+ if (READ_ONCE(page->mapping) != expected_mapping) { -+ unlock_page(page); -+ put_page(page); -+ goto stale; -+ } -+ unlock_page(page); -+ return page; -+stale: -+ /* -+ * We come here from above when page->mapping or !PageSwapCache -+ * suggests that the node is stale; but it might be under migration. -+ * We need smp_rmb(), matching the smp_wmb() in ksm_migrate_page(), -+ * before checking whether node->kpfn has been changed. -+ */ -+ smp_rmb(); -+ if (stable_node->kpfn != kpfn) -+ goto again; -+ -+ remove_node_from_stable_tree(stable_node, unlink_rb, remove_tree_node); -+ -+ return NULL; -+} -+ -+/* -+ * Removing rmap_item from stable or unstable tree. -+ * This function will clean the information from the stable/unstable tree. -+ */ -+static inline void remove_rmap_item_from_tree(struct rmap_item *rmap_item) -+{ -+ if (rmap_item->address & STABLE_FLAG) { -+ struct stable_node *stable_node; -+ struct node_vma *node_vma; -+ struct page *page; -+ -+ node_vma = rmap_item->head; -+ stable_node = node_vma->head; -+ page = get_uksm_page(stable_node, 1, 1); -+ if (!page) -+ goto out; -+ -+ /* -+ * page lock is needed because it's racing with -+ * try_to_unmap_ksm(), etc. -+ */ -+ lock_page(page); -+ hlist_del(&rmap_item->hlist); -+ -+ if (hlist_empty(&node_vma->rmap_hlist)) { -+ hlist_del(&node_vma->hlist); -+ free_node_vma(node_vma); -+ } -+ unlock_page(page); -+ -+ put_page(page); -+ if (hlist_empty(&stable_node->hlist)) { -+ /* do NOT call remove_node_from_stable_tree() here, -+ * it's possible for a forked rmap_item not in -+ * stable tree while the in-tree rmap_items were -+ * deleted. -+ */ -+ uksm_pages_shared--; -+ } else -+ uksm_pages_sharing--; -+ -+ -+ uksm_drop_anon_vma(rmap_item); -+ } else if (rmap_item->address & UNSTABLE_FLAG) { -+ if (rmap_item->hash_round == uksm_hash_round) { -+ -+ rb_erase(&rmap_item->node, -+ &rmap_item->tree_node->sub_root); -+ if (RB_EMPTY_ROOT(&rmap_item->tree_node->sub_root)) { -+ rb_erase(&rmap_item->tree_node->node, -+ &root_unstable_tree); -+ -+ free_tree_node(rmap_item->tree_node); -+ } else -+ rmap_item->tree_node->count--; -+ } -+ uksm_pages_unshared--; -+ } -+ -+ rmap_item->address &= PAGE_MASK; -+ rmap_item->hash_max = 0; -+ -+out: -+ cond_resched(); /* we're called from many long loops */ -+} -+ -+static inline int slot_in_uksm(struct vma_slot *slot) -+{ -+ return list_empty(&slot->slot_list); -+} -+ -+/* -+ * Test if the mm is exiting -+ */ -+static inline bool uksm_test_exit(struct mm_struct *mm) -+{ -+ return atomic_read(&mm->mm_users) == 0; -+} -+ -+static inline unsigned long vma_pool_size(struct vma_slot *slot) -+{ -+ return round_up(sizeof(struct rmap_list_entry) * slot->pages, -+ PAGE_SIZE) >> PAGE_SHIFT; -+} -+ -+#define CAN_OVERFLOW_U64(x, delta) (U64_MAX - (x) < (delta)) -+ -+/* must be done with sem locked */ -+static int slot_pool_alloc(struct vma_slot *slot) -+{ -+ unsigned long pool_size; -+ -+ if (slot->rmap_list_pool) -+ return 0; -+ -+ pool_size = vma_pool_size(slot); -+ slot->rmap_list_pool = kcalloc(pool_size, sizeof(struct page *), -+ GFP_KERNEL); -+ if (!slot->rmap_list_pool) -+ return -ENOMEM; -+ -+ slot->pool_counts = kcalloc(pool_size, sizeof(unsigned int), -+ GFP_KERNEL); -+ if (!slot->pool_counts) { -+ kfree(slot->rmap_list_pool); -+ return -ENOMEM; -+ } -+ -+ slot->pool_size = pool_size; -+ BUG_ON(CAN_OVERFLOW_U64(uksm_pages_total, slot->pages)); -+ slot->flags |= UKSM_SLOT_IN_UKSM; -+ uksm_pages_total += slot->pages; -+ -+ return 0; -+} -+ -+/* -+ * Called after vma is unlinked from its mm -+ */ -+void uksm_remove_vma(struct vm_area_struct *vma) -+{ -+ struct vma_slot *slot; -+ -+ if (!vma->uksm_vma_slot) -+ return; -+ -+ spin_lock(&vma_slot_list_lock); -+ slot = vma->uksm_vma_slot; -+ if (!slot) -+ goto out; -+ -+ if (slot_in_uksm(slot)) { -+ /** -+ * This slot has been added by ksmd, so move to the del list -+ * waiting ksmd to free it. -+ */ -+ list_add_tail(&slot->slot_list, &vma_slot_del); -+ } else { -+ /** -+ * It's still on new list. It's ok to free slot directly. -+ */ -+ list_del(&slot->slot_list); -+ free_vma_slot(slot); -+ } -+out: -+ vma->uksm_vma_slot = NULL; -+ spin_unlock(&vma_slot_list_lock); -+} -+ -+/** -+ * Need to do two things: -+ * 1. check if slot was moved to del list -+ * 2. make sure the mmap_sem is manipulated under valid vma. -+ * -+ * My concern here is that in some cases, this may make -+ * vma_slot_list_lock() waiters to serialized further by some -+ * sem->wait_lock, can this really be expensive? -+ * -+ * -+ * @return -+ * 0: if successfully locked mmap_sem -+ * -ENOENT: this slot was moved to del list -+ * -EBUSY: vma lock failed -+ */ -+static int try_down_read_slot_mmap_sem(struct vma_slot *slot) -+{ -+ struct vm_area_struct *vma; -+ struct mm_struct *mm; -+ struct rw_semaphore *sem; -+ -+ spin_lock(&vma_slot_list_lock); -+ -+ /* the slot_list was removed and inited from new list, when it enters -+ * uksm_list. If now it's not empty, then it must be moved to del list -+ */ -+ if (!slot_in_uksm(slot)) { -+ spin_unlock(&vma_slot_list_lock); -+ return -ENOENT; -+ } -+ -+ BUG_ON(slot->pages != vma_pages(slot->vma)); -+ /* Ok, vma still valid */ -+ vma = slot->vma; -+ mm = vma->vm_mm; -+ sem = &mm->mmap_sem; -+ -+ if (uksm_test_exit(mm)) { -+ spin_unlock(&vma_slot_list_lock); -+ return -ENOENT; -+ } -+ -+ if (down_read_trylock(sem)) { -+ spin_unlock(&vma_slot_list_lock); -+ if (slot_pool_alloc(slot)) { -+ uksm_remove_vma(vma); -+ up_read(sem); -+ return -ENOENT; -+ } -+ return 0; -+ } -+ -+ spin_unlock(&vma_slot_list_lock); -+ return -EBUSY; -+} -+ -+static inline unsigned long -+vma_page_address(struct page *page, struct vm_area_struct *vma) -+{ -+ pgoff_t pgoff = page->index; -+ unsigned long address; -+ -+ address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); -+ if (unlikely(address < vma->vm_start || address >= vma->vm_end)) { -+ /* page should be within @vma mapping range */ -+ return -EFAULT; -+ } -+ return address; -+} -+ -+ -+/* return 0 on success with the item's mmap_sem locked */ -+static inline int get_mergeable_page_lock_mmap(struct rmap_item *item) -+{ -+ struct mm_struct *mm; -+ struct vma_slot *slot = item->slot; -+ int err = -EINVAL; -+ -+ struct page *page; -+ -+ /* -+ * try_down_read_slot_mmap_sem() returns non-zero if the slot -+ * has been removed by uksm_remove_vma(). -+ */ -+ if (try_down_read_slot_mmap_sem(slot)) -+ return -EBUSY; -+ -+ mm = slot->vma->vm_mm; -+ -+ if (uksm_test_exit(mm)) -+ goto failout_up; -+ -+ page = item->page; -+ rcu_read_lock(); -+ if (!get_page_unless_zero(page)) { -+ rcu_read_unlock(); -+ goto failout_up; -+ } -+ -+ /* No need to consider huge page here. */ -+ if (item->slot->vma->anon_vma != page_anon_vma(page) || -+ vma_page_address(page, item->slot->vma) != get_rmap_addr(item)) { -+ /* -+ * TODO: -+ * should we release this item becase of its stale page -+ * mapping? -+ */ -+ put_page(page); -+ rcu_read_unlock(); -+ goto failout_up; -+ } -+ rcu_read_unlock(); -+ return 0; -+ -+failout_up: -+ up_read(&mm->mmap_sem); -+ return err; -+} -+ -+/* -+ * What kind of VMA is considered ? -+ */ -+static inline int vma_can_enter(struct vm_area_struct *vma) -+{ -+ return uksm_flags_can_scan(vma->vm_flags); -+} -+ -+/* -+ * Called whenever a fresh new vma is created A new vma_slot. -+ * is created and inserted into a global list Must be called. -+ * after vma is inserted to its mm. -+ */ -+void uksm_vma_add_new(struct vm_area_struct *vma) -+{ -+ struct vma_slot *slot; -+ -+ if (!vma_can_enter(vma)) { -+ vma->uksm_vma_slot = NULL; -+ return; -+ } -+ -+ slot = alloc_vma_slot(); -+ if (!slot) { -+ vma->uksm_vma_slot = NULL; -+ return; -+ } -+ -+ vma->uksm_vma_slot = slot; -+ vma->vm_flags |= VM_MERGEABLE; -+ slot->vma = vma; -+ slot->mm = vma->vm_mm; -+ slot->ctime_j = jiffies; -+ slot->pages = vma_pages(vma); -+ spin_lock(&vma_slot_list_lock); -+ list_add_tail(&slot->slot_list, &vma_slot_new); -+ spin_unlock(&vma_slot_list_lock); -+} -+ -+/* 32/3 < they < 32/2 */ -+#define shiftl 8 -+#define shiftr 12 -+ -+#define HASH_FROM_TO(from, to) \ -+for (index = from; index < to; index++) { \ -+ pos = random_nums[index]; \ -+ hash += key[pos]; \ -+ hash += (hash << shiftl); \ -+ hash ^= (hash >> shiftr); \ -+} -+ -+ -+#define HASH_FROM_DOWN_TO(from, to) \ -+for (index = from - 1; index >= to; index--) { \ -+ hash ^= (hash >> shiftr); \ -+ hash ^= (hash >> (shiftr*2)); \ -+ hash -= (hash << shiftl); \ -+ hash += (hash << (shiftl*2)); \ -+ pos = random_nums[index]; \ -+ hash -= key[pos]; \ -+} -+ -+/* -+ * The main random sample hash function. -+ */ -+static u32 random_sample_hash(void *addr, u32 hash_strength) -+{ -+ u32 hash = 0xdeadbeef; -+ int index, pos, loop = hash_strength; -+ u32 *key = (u32 *)addr; -+ -+ if (loop > HASH_STRENGTH_FULL) -+ loop = HASH_STRENGTH_FULL; -+ -+ HASH_FROM_TO(0, loop); -+ -+ if (hash_strength > HASH_STRENGTH_FULL) { -+ loop = hash_strength - HASH_STRENGTH_FULL; -+ HASH_FROM_TO(0, loop); -+ } -+ -+ return hash; -+} -+ -+ -+/** -+ * It's used when hash strength is adjusted -+ * -+ * @addr The page's virtual address -+ * @from The original hash strength -+ * @to The hash strength changed to -+ * @hash The hash value generated with "from" hash value -+ * -+ * return the hash value -+ */ -+static u32 delta_hash(void *addr, int from, int to, u32 hash) -+{ -+ u32 *key = (u32 *)addr; -+ int index, pos; /* make sure they are int type */ -+ -+ if (to > from) { -+ if (from >= HASH_STRENGTH_FULL) { -+ from -= HASH_STRENGTH_FULL; -+ to -= HASH_STRENGTH_FULL; -+ HASH_FROM_TO(from, to); -+ } else if (to <= HASH_STRENGTH_FULL) { -+ HASH_FROM_TO(from, to); -+ } else { -+ HASH_FROM_TO(from, HASH_STRENGTH_FULL); -+ HASH_FROM_TO(0, to - HASH_STRENGTH_FULL); -+ } -+ } else { -+ if (from <= HASH_STRENGTH_FULL) { -+ HASH_FROM_DOWN_TO(from, to); -+ } else if (to >= HASH_STRENGTH_FULL) { -+ from -= HASH_STRENGTH_FULL; -+ to -= HASH_STRENGTH_FULL; -+ HASH_FROM_DOWN_TO(from, to); -+ } else { -+ HASH_FROM_DOWN_TO(from - HASH_STRENGTH_FULL, 0); -+ HASH_FROM_DOWN_TO(HASH_STRENGTH_FULL, to); -+ } -+ } -+ -+ return hash; -+} -+ -+/** -+ * -+ * Called when: rshash_pos or rshash_neg is about to overflow or a scan round -+ * has finished. -+ * -+ * return 0 if no page has been scanned since last call, 1 otherwise. -+ */ -+static inline int encode_benefit(void) -+{ -+ u64 scanned_delta, pos_delta, neg_delta; -+ unsigned long base = benefit.base; -+ -+ scanned_delta = uksm_pages_scanned - uksm_pages_scanned_last; -+ -+ if (!scanned_delta) -+ return 0; -+ -+ scanned_delta >>= base; -+ pos_delta = rshash_pos >> base; -+ neg_delta = rshash_neg >> base; -+ -+ if (CAN_OVERFLOW_U64(benefit.pos, pos_delta) || -+ CAN_OVERFLOW_U64(benefit.neg, neg_delta) || -+ CAN_OVERFLOW_U64(benefit.scanned, scanned_delta)) { -+ benefit.scanned >>= 1; -+ benefit.neg >>= 1; -+ benefit.pos >>= 1; -+ benefit.base++; -+ scanned_delta >>= 1; -+ pos_delta >>= 1; -+ neg_delta >>= 1; -+ } -+ -+ benefit.pos += pos_delta; -+ benefit.neg += neg_delta; -+ benefit.scanned += scanned_delta; -+ -+ BUG_ON(!benefit.scanned); -+ -+ rshash_pos = rshash_neg = 0; -+ uksm_pages_scanned_last = uksm_pages_scanned; -+ -+ return 1; -+} -+ -+static inline void reset_benefit(void) -+{ -+ benefit.pos = 0; -+ benefit.neg = 0; -+ benefit.base = 0; -+ benefit.scanned = 0; -+} -+ -+static inline void inc_rshash_pos(unsigned long delta) -+{ -+ if (CAN_OVERFLOW_U64(rshash_pos, delta)) -+ encode_benefit(); -+ -+ rshash_pos += delta; -+} -+ -+static inline void inc_rshash_neg(unsigned long delta) -+{ -+ if (CAN_OVERFLOW_U64(rshash_neg, delta)) -+ encode_benefit(); -+ -+ rshash_neg += delta; -+} -+ -+ -+static inline u32 page_hash(struct page *page, unsigned long hash_strength, -+ int cost_accounting) -+{ -+ u32 val; -+ unsigned long delta; -+ -+ void *addr = kmap_atomic(page); -+ -+ val = random_sample_hash(addr, hash_strength); -+ kunmap_atomic(addr); -+ -+ if (cost_accounting) { -+ if (hash_strength < HASH_STRENGTH_FULL) -+ delta = HASH_STRENGTH_FULL - hash_strength; -+ else -+ delta = 0; -+ -+ inc_rshash_pos(delta); -+ } -+ -+ return val; -+} -+ -+static int memcmp_pages_with_cost(struct page *page1, struct page *page2, -+ int cost_accounting) -+{ -+ char *addr1, *addr2; -+ int ret; -+ -+ addr1 = kmap_atomic(page1); -+ addr2 = kmap_atomic(page2); -+ ret = memcmp(addr1, addr2, PAGE_SIZE); -+ kunmap_atomic(addr2); -+ kunmap_atomic(addr1); -+ -+ if (cost_accounting) -+ inc_rshash_neg(memcmp_cost); -+ -+ return ret; -+} -+ -+static inline int pages_identical_with_cost(struct page *page1, struct page *page2) -+{ -+ return !memcmp_pages_with_cost(page1, page2, 0); -+} -+ -+static inline int is_page_full_zero(struct page *page) -+{ -+ char *addr; -+ int ret; -+ -+ addr = kmap_atomic(page); -+ ret = is_full_zero(addr, PAGE_SIZE); -+ kunmap_atomic(addr); -+ -+ return ret; -+} -+ -+static int write_protect_page(struct vm_area_struct *vma, struct page *page, -+ pte_t *orig_pte, pte_t *old_pte) -+{ -+ struct mm_struct *mm = vma->vm_mm; -+ struct page_vma_mapped_walk pvmw = { -+ .page = page, -+ .vma = vma, -+ }; -+ struct mmu_notifier_range range; -+ int swapped; -+ int err = -EFAULT; -+ -+ pvmw.address = page_address_in_vma(page, vma); -+ if (pvmw.address == -EFAULT) -+ goto out; -+ -+ BUG_ON(PageTransCompound(page)); -+ -+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, pvmw.address, -+ pvmw.address + PAGE_SIZE); -+ mmu_notifier_invalidate_range_start(&range); -+ -+ if (!page_vma_mapped_walk(&pvmw)) -+ goto out_mn; -+ if (WARN_ONCE(!pvmw.pte, "Unexpected PMD mapping?")) -+ goto out_unlock; -+ -+ if (old_pte) -+ *old_pte = *pvmw.pte; -+ -+ if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) || -+ (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte)) || mm_tlb_flush_pending(mm)) { -+ pte_t entry; -+ -+ swapped = PageSwapCache(page); -+ flush_cache_page(vma, pvmw.address, page_to_pfn(page)); -+ /* -+ * Ok this is tricky, when get_user_pages_fast() run it doesn't -+ * take any lock, therefore the check that we are going to make -+ * with the pagecount against the mapcount is racey and -+ * O_DIRECT can happen right after the check. -+ * So we clear the pte and flush the tlb before the check -+ * this assure us that no O_DIRECT can happen after the check -+ * or in the middle of the check. -+ */ -+ entry = ptep_clear_flush_notify(vma, pvmw.address, pvmw.pte); -+ /* -+ * Check that no O_DIRECT or similar I/O is in progress on the -+ * page -+ */ -+ if (page_mapcount(page) + 1 + swapped != page_count(page)) { -+ set_pte_at(mm, pvmw.address, pvmw.pte, entry); -+ goto out_unlock; -+ } -+ if (pte_dirty(entry)) -+ set_page_dirty(page); -+ -+ if (pte_protnone(entry)) -+ entry = pte_mkclean(pte_clear_savedwrite(entry)); -+ else -+ entry = pte_mkclean(pte_wrprotect(entry)); -+ -+ set_pte_at_notify(mm, pvmw.address, pvmw.pte, entry); -+ } -+ *orig_pte = *pvmw.pte; -+ err = 0; -+ -+out_unlock: -+ page_vma_mapped_walk_done(&pvmw); -+out_mn: -+ mmu_notifier_invalidate_range_end(&range); -+out: -+ return err; -+} -+ -+#define MERGE_ERR_PGERR 1 /* the page is invalid cannot continue */ -+#define MERGE_ERR_COLLI 2 /* there is a collision */ -+#define MERGE_ERR_COLLI_MAX 3 /* collision at the max hash strength */ -+#define MERGE_ERR_CHANGED 4 /* the page has changed since last hash */ -+ -+ -+/** -+ * replace_page - replace page in vma by new ksm page -+ * @vma: vma that holds the pte pointing to page -+ * @page: the page we are replacing by kpage -+ * @kpage: the ksm page we replace page by -+ * @orig_pte: the original value of the pte -+ * -+ * Returns 0 on success, MERGE_ERR_PGERR on failure. -+ */ -+static int replace_page(struct vm_area_struct *vma, struct page *page, -+ struct page *kpage, pte_t orig_pte) -+{ -+ struct mm_struct *mm = vma->vm_mm; -+ struct mmu_notifier_range range; -+ pgd_t *pgd; -+ p4d_t *p4d; -+ pud_t *pud; -+ pmd_t *pmd; -+ pte_t *ptep; -+ spinlock_t *ptl; -+ pte_t entry; -+ -+ unsigned long addr; -+ int err = MERGE_ERR_PGERR; -+ -+ addr = page_address_in_vma(page, vma); -+ if (addr == -EFAULT) -+ goto out; -+ -+ pgd = pgd_offset(mm, addr); -+ if (!pgd_present(*pgd)) -+ goto out; -+ -+ p4d = p4d_offset(pgd, addr); -+ pud = pud_offset(p4d, addr); -+ if (!pud_present(*pud)) -+ goto out; -+ -+ pmd = pmd_offset(pud, addr); -+ BUG_ON(pmd_trans_huge(*pmd)); -+ if (!pmd_present(*pmd)) -+ goto out; -+ -+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr, -+ addr + PAGE_SIZE); -+ mmu_notifier_invalidate_range_start(&range); -+ -+ ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); -+ if (!pte_same(*ptep, orig_pte)) { -+ pte_unmap_unlock(ptep, ptl); -+ goto out_mn; -+ } -+ -+ flush_cache_page(vma, addr, pte_pfn(*ptep)); -+ ptep_clear_flush_notify(vma, addr, ptep); -+ entry = mk_pte(kpage, vma->vm_page_prot); -+ -+ /* special treatment is needed for zero_page */ -+ if ((page_to_pfn(kpage) == uksm_zero_pfn) || -+ (page_to_pfn(kpage) == zero_pfn)) { -+ entry = pte_mkspecial(entry); -+ dec_mm_counter(mm, MM_ANONPAGES); -+ inc_zone_page_state(page, NR_UKSM_ZERO_PAGES); -+ } else { -+ get_page(kpage); -+ page_add_anon_rmap(kpage, vma, addr, false); -+ } -+ -+ set_pte_at_notify(mm, addr, ptep, entry); -+ -+ page_remove_rmap(page, false); -+ if (!page_mapped(page)) -+ try_to_free_swap(page); -+ put_page(page); -+ -+ pte_unmap_unlock(ptep, ptl); -+ err = 0; -+out_mn: -+ mmu_notifier_invalidate_range_end(&range); -+out: -+ return err; -+} -+ -+ -+/** -+ * Fully hash a page with HASH_STRENGTH_MAX return a non-zero hash value. The -+ * zero hash value at HASH_STRENGTH_MAX is used to indicated that its -+ * hash_max member has not been calculated. -+ * -+ * @page The page needs to be hashed -+ * @hash_old The hash value calculated with current hash strength -+ * -+ * return the new hash value calculated at HASH_STRENGTH_MAX -+ */ -+static inline u32 page_hash_max(struct page *page, u32 hash_old) -+{ -+ u32 hash_max = 0; -+ void *addr; -+ -+ addr = kmap_atomic(page); -+ hash_max = delta_hash(addr, hash_strength, -+ HASH_STRENGTH_MAX, hash_old); -+ -+ kunmap_atomic(addr); -+ -+ if (!hash_max) -+ hash_max = 1; -+ -+ inc_rshash_neg(HASH_STRENGTH_MAX - hash_strength); -+ return hash_max; -+} -+ -+/* -+ * We compare the hash again, to ensure that it is really a hash collision -+ * instead of being caused by page write. -+ */ -+static inline int check_collision(struct rmap_item *rmap_item, -+ u32 hash) -+{ -+ int err; -+ struct page *page = rmap_item->page; -+ -+ /* if this rmap_item has already been hash_maxed, then the collision -+ * must appears in the second-level rbtree search. In this case we check -+ * if its hash_max value has been changed. Otherwise, the collision -+ * happens in the first-level rbtree search, so we check against it's -+ * current hash value. -+ */ -+ if (rmap_item->hash_max) { -+ inc_rshash_neg(memcmp_cost); -+ inc_rshash_neg(HASH_STRENGTH_MAX - hash_strength); -+ -+ if (rmap_item->hash_max == page_hash_max(page, hash)) -+ err = MERGE_ERR_COLLI; -+ else -+ err = MERGE_ERR_CHANGED; -+ } else { -+ inc_rshash_neg(memcmp_cost + hash_strength); -+ -+ if (page_hash(page, hash_strength, 0) == hash) -+ err = MERGE_ERR_COLLI; -+ else -+ err = MERGE_ERR_CHANGED; -+ } -+ -+ return err; -+} -+ -+/** -+ * Try to merge a rmap_item.page with a kpage in stable node. kpage must -+ * already be a ksm page. -+ * -+ * @return 0 if the pages were merged, -EFAULT otherwise. -+ */ -+static int try_to_merge_with_uksm_page(struct rmap_item *rmap_item, -+ struct page *kpage, u32 hash) -+{ -+ struct vm_area_struct *vma = rmap_item->slot->vma; -+ struct mm_struct *mm = vma->vm_mm; -+ pte_t orig_pte = __pte(0); -+ int err = MERGE_ERR_PGERR; -+ struct page *page; -+ -+ if (uksm_test_exit(mm)) -+ goto out; -+ -+ page = rmap_item->page; -+ -+ if (page == kpage) { /* ksm page forked */ -+ err = 0; -+ goto out; -+ } -+ -+ /* -+ * We need the page lock to read a stable PageSwapCache in -+ * write_protect_page(). We use trylock_page() instead of -+ * lock_page() because we don't want to wait here - we -+ * prefer to continue scanning and merging different pages, -+ * then come back to this page when it is unlocked. -+ */ -+ if (!trylock_page(page)) -+ goto out; -+ -+ if (!PageAnon(page) || !PageKsm(kpage)) -+ goto out_unlock; -+ -+ if (PageTransCompound(page)) { -+ err = split_huge_page(page); -+ if (err) -+ goto out_unlock; -+ } -+ -+ /* -+ * If this anonymous page is mapped only here, its pte may need -+ * to be write-protected. If it's mapped elsewhere, all of its -+ * ptes are necessarily already write-protected. But in either -+ * case, we need to lock and check page_count is not raised. -+ */ -+ if (write_protect_page(vma, page, &orig_pte, NULL) == 0) { -+ if (pages_identical_with_cost(page, kpage)) -+ err = replace_page(vma, page, kpage, orig_pte); -+ else -+ err = check_collision(rmap_item, hash); -+ } -+ -+ if ((vma->vm_flags & VM_LOCKED) && kpage && !err) { -+ munlock_vma_page(page); -+ if (!PageMlocked(kpage)) { -+ unlock_page(page); -+ lock_page(kpage); -+ mlock_vma_page(kpage); -+ page = kpage; /* for final unlock */ -+ } -+ } -+ -+out_unlock: -+ unlock_page(page); -+out: -+ return err; -+} -+ -+ -+ -+/** -+ * If two pages fail to merge in try_to_merge_two_pages, then we have a chance -+ * to restore a page mapping that has been changed in try_to_merge_two_pages. -+ * -+ * @return 0 on success. -+ */ -+static int restore_uksm_page_pte(struct vm_area_struct *vma, unsigned long addr, -+ pte_t orig_pte, pte_t wprt_pte) -+{ -+ struct mm_struct *mm = vma->vm_mm; -+ pgd_t *pgd; -+ p4d_t *p4d; -+ pud_t *pud; -+ pmd_t *pmd; -+ pte_t *ptep; -+ spinlock_t *ptl; -+ -+ int err = -EFAULT; -+ -+ pgd = pgd_offset(mm, addr); -+ if (!pgd_present(*pgd)) -+ goto out; -+ -+ p4d = p4d_offset(pgd, addr); -+ pud = pud_offset(p4d, addr); -+ if (!pud_present(*pud)) -+ goto out; -+ -+ pmd = pmd_offset(pud, addr); -+ if (!pmd_present(*pmd)) -+ goto out; -+ -+ ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); -+ if (!pte_same(*ptep, wprt_pte)) { -+ /* already copied, let it be */ -+ pte_unmap_unlock(ptep, ptl); -+ goto out; -+ } -+ -+ /* -+ * Good boy, still here. When we still get the ksm page, it does not -+ * return to the free page pool, there is no way that a pte was changed -+ * to other page and gets back to this page. And remind that ksm page -+ * do not reuse in do_wp_page(). So it's safe to restore the original -+ * pte. -+ */ -+ flush_cache_page(vma, addr, pte_pfn(*ptep)); -+ ptep_clear_flush_notify(vma, addr, ptep); -+ set_pte_at_notify(mm, addr, ptep, orig_pte); -+ -+ pte_unmap_unlock(ptep, ptl); -+ err = 0; -+out: -+ return err; -+} -+ -+/** -+ * try_to_merge_two_pages() - take two identical pages and prepare -+ * them to be merged into one page(rmap_item->page) -+ * -+ * @return 0 if we successfully merged two identical pages into -+ * one ksm page. MERGE_ERR_COLLI if it's only a hash collision -+ * search in rbtree. MERGE_ERR_CHANGED if rmap_item has been -+ * changed since it's hashed. MERGE_ERR_PGERR otherwise. -+ * -+ */ -+static int try_to_merge_two_pages(struct rmap_item *rmap_item, -+ struct rmap_item *tree_rmap_item, -+ u32 hash) -+{ -+ pte_t orig_pte1 = __pte(0), orig_pte2 = __pte(0); -+ pte_t wprt_pte1 = __pte(0), wprt_pte2 = __pte(0); -+ struct vm_area_struct *vma1 = rmap_item->slot->vma; -+ struct vm_area_struct *vma2 = tree_rmap_item->slot->vma; -+ struct page *page = rmap_item->page; -+ struct page *tree_page = tree_rmap_item->page; -+ int err = MERGE_ERR_PGERR; -+ struct address_space *saved_mapping; -+ -+ -+ if (rmap_item->page == tree_rmap_item->page) -+ goto out; -+ -+ if (!trylock_page(page)) -+ goto out; -+ -+ if (!PageAnon(page)) -+ goto out_unlock; -+ -+ if (PageTransCompound(page)) { -+ err = split_huge_page(page); -+ if (err) -+ goto out_unlock; -+ } -+ -+ if (write_protect_page(vma1, page, &wprt_pte1, &orig_pte1) != 0) { -+ unlock_page(page); -+ goto out; -+ } -+ -+ /* -+ * While we hold page lock, upgrade page from -+ * PageAnon+anon_vma to PageKsm+NULL stable_node: -+ * stable_tree_insert() will update stable_node. -+ */ -+ saved_mapping = page->mapping; -+ set_page_stable_node(page, NULL); -+ mark_page_accessed(page); -+ if (!PageDirty(page)) -+ SetPageDirty(page); -+ -+ unlock_page(page); -+ -+ if (!trylock_page(tree_page)) -+ goto restore_out; -+ -+ if (!PageAnon(tree_page)) { -+ unlock_page(tree_page); -+ goto restore_out; -+ } -+ -+ if (PageTransCompound(tree_page)) { -+ err = split_huge_page(tree_page); -+ if (err) { -+ unlock_page(tree_page); -+ goto restore_out; -+ } -+ } -+ -+ if (write_protect_page(vma2, tree_page, &wprt_pte2, &orig_pte2) != 0) { -+ unlock_page(tree_page); -+ goto restore_out; -+ } -+ -+ if (pages_identical_with_cost(page, tree_page)) { -+ err = replace_page(vma2, tree_page, page, wprt_pte2); -+ if (err) { -+ unlock_page(tree_page); -+ goto restore_out; -+ } -+ -+ if ((vma2->vm_flags & VM_LOCKED)) { -+ munlock_vma_page(tree_page); -+ if (!PageMlocked(page)) { -+ unlock_page(tree_page); -+ lock_page(page); -+ mlock_vma_page(page); -+ tree_page = page; /* for final unlock */ -+ } -+ } -+ -+ unlock_page(tree_page); -+ -+ goto out; /* success */ -+ -+ } else { -+ if (tree_rmap_item->hash_max && -+ tree_rmap_item->hash_max == rmap_item->hash_max) { -+ err = MERGE_ERR_COLLI_MAX; -+ } else if (page_hash(page, hash_strength, 0) == -+ page_hash(tree_page, hash_strength, 0)) { -+ inc_rshash_neg(memcmp_cost + hash_strength * 2); -+ err = MERGE_ERR_COLLI; -+ } else { -+ err = MERGE_ERR_CHANGED; -+ } -+ -+ unlock_page(tree_page); -+ } -+ -+restore_out: -+ lock_page(page); -+ if (!restore_uksm_page_pte(vma1, get_rmap_addr(rmap_item), -+ orig_pte1, wprt_pte1)) -+ page->mapping = saved_mapping; -+ -+out_unlock: -+ unlock_page(page); -+out: -+ return err; -+} -+ -+static inline int hash_cmp(u32 new_val, u32 node_val) -+{ -+ if (new_val > node_val) -+ return 1; -+ else if (new_val < node_val) -+ return -1; -+ else -+ return 0; -+} -+ -+static inline u32 rmap_item_hash_max(struct rmap_item *item, u32 hash) -+{ -+ u32 hash_max = item->hash_max; -+ -+ if (!hash_max) { -+ hash_max = page_hash_max(item->page, hash); -+ -+ item->hash_max = hash_max; -+ } -+ -+ return hash_max; -+} -+ -+ -+ -+/** -+ * stable_tree_search() - search the stable tree for a page -+ * -+ * @item: the rmap_item we are comparing with -+ * @hash: the hash value of this item->page already calculated -+ * -+ * @return the page we have found, NULL otherwise. The page returned has -+ * been gotten. -+ */ -+static struct page *stable_tree_search(struct rmap_item *item, u32 hash) -+{ -+ struct rb_node *node = root_stable_treep->rb_node; -+ struct tree_node *tree_node; -+ unsigned long hash_max; -+ struct page *page = item->page; -+ struct stable_node *stable_node; -+ -+ stable_node = page_stable_node(page); -+ if (stable_node) { -+ /* ksm page forked, that is -+ * if (PageKsm(page) && !in_stable_tree(rmap_item)) -+ * it's actually gotten once outside. -+ */ -+ get_page(page); -+ return page; -+ } -+ -+ while (node) { -+ int cmp; -+ -+ tree_node = rb_entry(node, struct tree_node, node); -+ -+ cmp = hash_cmp(hash, tree_node->hash); -+ -+ if (cmp < 0) -+ node = node->rb_left; -+ else if (cmp > 0) -+ node = node->rb_right; -+ else -+ break; -+ } -+ -+ if (!node) -+ return NULL; -+ -+ if (tree_node->count == 1) { -+ stable_node = rb_entry(tree_node->sub_root.rb_node, -+ struct stable_node, node); -+ BUG_ON(!stable_node); -+ -+ goto get_page_out; -+ } -+ -+ /* -+ * ok, we have to search the second -+ * level subtree, hash the page to a -+ * full strength. -+ */ -+ node = tree_node->sub_root.rb_node; -+ BUG_ON(!node); -+ hash_max = rmap_item_hash_max(item, hash); -+ -+ while (node) { -+ int cmp; -+ -+ stable_node = rb_entry(node, struct stable_node, node); -+ -+ cmp = hash_cmp(hash_max, stable_node->hash_max); -+ -+ if (cmp < 0) -+ node = node->rb_left; -+ else if (cmp > 0) -+ node = node->rb_right; -+ else -+ goto get_page_out; -+ } -+ -+ return NULL; -+ -+get_page_out: -+ page = get_uksm_page(stable_node, 1, 1); -+ return page; -+} -+ -+static int try_merge_rmap_item(struct rmap_item *item, -+ struct page *kpage, -+ struct page *tree_page) -+{ -+ struct vm_area_struct *vma = item->slot->vma; -+ struct page_vma_mapped_walk pvmw = { -+ .page = kpage, -+ .vma = vma, -+ }; -+ -+ pvmw.address = get_rmap_addr(item); -+ if (!page_vma_mapped_walk(&pvmw)) -+ return 0; -+ -+ if (pte_write(*pvmw.pte)) { -+ /* has changed, abort! */ -+ page_vma_mapped_walk_done(&pvmw); -+ return 0; -+ } -+ -+ get_page(tree_page); -+ page_add_anon_rmap(tree_page, vma, pvmw.address, false); -+ -+ flush_cache_page(vma, pvmw.address, page_to_pfn(kpage)); -+ ptep_clear_flush_notify(vma, pvmw.address, pvmw.pte); -+ set_pte_at_notify(vma->vm_mm, pvmw.address, pvmw.pte, -+ mk_pte(tree_page, vma->vm_page_prot)); -+ -+ page_remove_rmap(kpage, false); -+ put_page(kpage); -+ -+ page_vma_mapped_walk_done(&pvmw); -+ -+ return 1; -+} -+ -+/** -+ * try_to_merge_with_stable_page() - when two rmap_items need to be inserted -+ * into stable tree, the page was found to be identical to a stable ksm page, -+ * this is the last chance we can merge them into one. -+ * -+ * @item1: the rmap_item holding the page which we wanted to insert -+ * into stable tree. -+ * @item2: the other rmap_item we found when unstable tree search -+ * @oldpage: the page currently mapped by the two rmap_items -+ * @tree_page: the page we found identical in stable tree node -+ * @success1: return if item1 is successfully merged -+ * @success2: return if item2 is successfully merged -+ */ -+static void try_merge_with_stable(struct rmap_item *item1, -+ struct rmap_item *item2, -+ struct page **kpage, -+ struct page *tree_page, -+ int *success1, int *success2) -+{ -+ struct vm_area_struct *vma1 = item1->slot->vma; -+ struct vm_area_struct *vma2 = item2->slot->vma; -+ *success1 = 0; -+ *success2 = 0; -+ -+ if (unlikely(*kpage == tree_page)) { -+ /* I don't think this can really happen */ -+ pr_warn("UKSM: unexpected condition detected in " -+ "%s -- *kpage == tree_page !\n", __func__); -+ *success1 = 1; -+ *success2 = 1; -+ return; -+ } -+ -+ if (!PageAnon(*kpage) || !PageKsm(*kpage)) -+ goto failed; -+ -+ if (!trylock_page(tree_page)) -+ goto failed; -+ -+ /* If the oldpage is still ksm and still pointed -+ * to in the right place, and still write protected, -+ * we are confident it's not changed, no need to -+ * memcmp anymore. -+ * be ware, we cannot take nested pte locks, -+ * deadlock risk. -+ */ -+ if (!try_merge_rmap_item(item1, *kpage, tree_page)) -+ goto unlock_failed; -+ -+ /* ok, then vma2, remind that pte1 already set */ -+ if (!try_merge_rmap_item(item2, *kpage, tree_page)) -+ goto success_1; -+ -+ *success2 = 1; -+success_1: -+ *success1 = 1; -+ -+ -+ if ((*success1 && vma1->vm_flags & VM_LOCKED) || -+ (*success2 && vma2->vm_flags & VM_LOCKED)) { -+ munlock_vma_page(*kpage); -+ if (!PageMlocked(tree_page)) -+ mlock_vma_page(tree_page); -+ } -+ -+ /* -+ * We do not need oldpage any more in the caller, so can break the lock -+ * now. -+ */ -+ unlock_page(*kpage); -+ *kpage = tree_page; /* Get unlocked outside. */ -+ return; -+ -+unlock_failed: -+ unlock_page(tree_page); -+failed: -+ return; -+} -+ -+static inline void stable_node_hash_max(struct stable_node *node, -+ struct page *page, u32 hash) -+{ -+ u32 hash_max = node->hash_max; -+ -+ if (!hash_max) { -+ hash_max = page_hash_max(page, hash); -+ node->hash_max = hash_max; -+ } -+} -+ -+static inline -+struct stable_node *new_stable_node(struct tree_node *tree_node, -+ struct page *kpage, u32 hash_max) -+{ -+ struct stable_node *new_stable_node; -+ -+ new_stable_node = alloc_stable_node(); -+ if (!new_stable_node) -+ return NULL; -+ -+ new_stable_node->kpfn = page_to_pfn(kpage); -+ new_stable_node->hash_max = hash_max; -+ new_stable_node->tree_node = tree_node; -+ set_page_stable_node(kpage, new_stable_node); -+ -+ return new_stable_node; -+} -+ -+static inline -+struct stable_node *first_level_insert(struct tree_node *tree_node, -+ struct rmap_item *rmap_item, -+ struct rmap_item *tree_rmap_item, -+ struct page **kpage, u32 hash, -+ int *success1, int *success2) -+{ -+ int cmp; -+ struct page *tree_page; -+ u32 hash_max = 0; -+ struct stable_node *stable_node, *new_snode; -+ struct rb_node *parent = NULL, **new; -+ -+ /* this tree node contains no sub-tree yet */ -+ stable_node = rb_entry(tree_node->sub_root.rb_node, -+ struct stable_node, node); -+ -+ tree_page = get_uksm_page(stable_node, 1, 0); -+ if (tree_page) { -+ cmp = memcmp_pages_with_cost(*kpage, tree_page, 1); -+ if (!cmp) { -+ try_merge_with_stable(rmap_item, tree_rmap_item, kpage, -+ tree_page, success1, success2); -+ put_page(tree_page); -+ if (!*success1 && !*success2) -+ goto failed; -+ -+ return stable_node; -+ -+ } else { -+ /* -+ * collision in first level try to create a subtree. -+ * A new node need to be created. -+ */ -+ put_page(tree_page); -+ -+ stable_node_hash_max(stable_node, tree_page, -+ tree_node->hash); -+ hash_max = rmap_item_hash_max(rmap_item, hash); -+ cmp = hash_cmp(hash_max, stable_node->hash_max); -+ -+ parent = &stable_node->node; -+ if (cmp < 0) -+ new = &parent->rb_left; -+ else if (cmp > 0) -+ new = &parent->rb_right; -+ else -+ goto failed; -+ } -+ -+ } else { -+ /* the only stable_node deleted, we reuse its tree_node. -+ */ -+ parent = NULL; -+ new = &tree_node->sub_root.rb_node; -+ } -+ -+ new_snode = new_stable_node(tree_node, *kpage, hash_max); -+ if (!new_snode) -+ goto failed; -+ -+ rb_link_node(&new_snode->node, parent, new); -+ rb_insert_color(&new_snode->node, &tree_node->sub_root); -+ tree_node->count++; -+ *success1 = *success2 = 1; -+ -+ return new_snode; -+ -+failed: -+ return NULL; -+} -+ -+static inline -+struct stable_node *stable_subtree_insert(struct tree_node *tree_node, -+ struct rmap_item *rmap_item, -+ struct rmap_item *tree_rmap_item, -+ struct page **kpage, u32 hash, -+ int *success1, int *success2) -+{ -+ struct page *tree_page; -+ u32 hash_max; -+ struct stable_node *stable_node, *new_snode; -+ struct rb_node *parent, **new; -+ -+research: -+ parent = NULL; -+ new = &tree_node->sub_root.rb_node; -+ BUG_ON(!*new); -+ hash_max = rmap_item_hash_max(rmap_item, hash); -+ while (*new) { -+ int cmp; -+ -+ stable_node = rb_entry(*new, struct stable_node, node); -+ -+ cmp = hash_cmp(hash_max, stable_node->hash_max); -+ -+ if (cmp < 0) { -+ parent = *new; -+ new = &parent->rb_left; -+ } else if (cmp > 0) { -+ parent = *new; -+ new = &parent->rb_right; -+ } else { -+ tree_page = get_uksm_page(stable_node, 1, 0); -+ if (tree_page) { -+ cmp = memcmp_pages_with_cost(*kpage, tree_page, 1); -+ if (!cmp) { -+ try_merge_with_stable(rmap_item, -+ tree_rmap_item, kpage, -+ tree_page, success1, success2); -+ -+ put_page(tree_page); -+ if (!*success1 && !*success2) -+ goto failed; -+ /* -+ * successfully merged with a stable -+ * node -+ */ -+ return stable_node; -+ } else { -+ put_page(tree_page); -+ goto failed; -+ } -+ } else { -+ /* -+ * stable node may be deleted, -+ * and subtree maybe -+ * restructed, cannot -+ * continue, research it. -+ */ -+ if (tree_node->count) { -+ goto research; -+ } else { -+ /* reuse the tree node*/ -+ parent = NULL; -+ new = &tree_node->sub_root.rb_node; -+ } -+ } -+ } -+ } -+ -+ new_snode = new_stable_node(tree_node, *kpage, hash_max); -+ if (!new_snode) -+ goto failed; -+ -+ rb_link_node(&new_snode->node, parent, new); -+ rb_insert_color(&new_snode->node, &tree_node->sub_root); -+ tree_node->count++; -+ *success1 = *success2 = 1; -+ -+ return new_snode; -+ -+failed: -+ return NULL; -+} -+ -+ -+/** -+ * stable_tree_insert() - try to insert a merged page in unstable tree to -+ * the stable tree -+ * -+ * @kpage: the page need to be inserted -+ * @hash: the current hash of this page -+ * @rmap_item: the rmap_item being scanned -+ * @tree_rmap_item: the rmap_item found on unstable tree -+ * @success1: return if rmap_item is merged -+ * @success2: return if tree_rmap_item is merged -+ * -+ * @return the stable_node on stable tree if at least one -+ * rmap_item is inserted into stable tree, NULL -+ * otherwise. -+ */ -+static struct stable_node * -+stable_tree_insert(struct page **kpage, u32 hash, -+ struct rmap_item *rmap_item, -+ struct rmap_item *tree_rmap_item, -+ int *success1, int *success2) -+{ -+ struct rb_node **new = &root_stable_treep->rb_node; -+ struct rb_node *parent = NULL; -+ struct stable_node *stable_node; -+ struct tree_node *tree_node; -+ u32 hash_max = 0; -+ -+ *success1 = *success2 = 0; -+ -+ while (*new) { -+ int cmp; -+ -+ tree_node = rb_entry(*new, struct tree_node, node); -+ -+ cmp = hash_cmp(hash, tree_node->hash); -+ -+ if (cmp < 0) { -+ parent = *new; -+ new = &parent->rb_left; -+ } else if (cmp > 0) { -+ parent = *new; -+ new = &parent->rb_right; -+ } else -+ break; -+ } -+ -+ if (*new) { -+ if (tree_node->count == 1) { -+ stable_node = first_level_insert(tree_node, rmap_item, -+ tree_rmap_item, kpage, -+ hash, success1, success2); -+ } else { -+ stable_node = stable_subtree_insert(tree_node, -+ rmap_item, tree_rmap_item, kpage, -+ hash, success1, success2); -+ } -+ } else { -+ -+ /* no tree node found */ -+ tree_node = alloc_tree_node(stable_tree_node_listp); -+ if (!tree_node) { -+ stable_node = NULL; -+ goto out; -+ } -+ -+ stable_node = new_stable_node(tree_node, *kpage, hash_max); -+ if (!stable_node) { -+ free_tree_node(tree_node); -+ goto out; -+ } -+ -+ tree_node->hash = hash; -+ rb_link_node(&tree_node->node, parent, new); -+ rb_insert_color(&tree_node->node, root_stable_treep); -+ parent = NULL; -+ new = &tree_node->sub_root.rb_node; -+ -+ rb_link_node(&stable_node->node, parent, new); -+ rb_insert_color(&stable_node->node, &tree_node->sub_root); -+ tree_node->count++; -+ *success1 = *success2 = 1; -+ } -+ -+out: -+ return stable_node; -+} -+ -+ -+/** -+ * get_tree_rmap_item_page() - try to get the page and lock the mmap_sem -+ * -+ * @return 0 on success, -EBUSY if unable to lock the mmap_sem, -+ * -EINVAL if the page mapping has been changed. -+ */ -+static inline int get_tree_rmap_item_page(struct rmap_item *tree_rmap_item) -+{ -+ int err; -+ -+ err = get_mergeable_page_lock_mmap(tree_rmap_item); -+ -+ if (err == -EINVAL) { -+ /* its page map has been changed, remove it */ -+ remove_rmap_item_from_tree(tree_rmap_item); -+ } -+ -+ /* The page is gotten and mmap_sem is locked now. */ -+ return err; -+} -+ -+ -+/** -+ * unstable_tree_search_insert() - search an unstable tree rmap_item with the -+ * same hash value. Get its page and trylock the mmap_sem -+ */ -+static inline -+struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item, -+ u32 hash) -+ -+{ -+ struct rb_node **new = &root_unstable_tree.rb_node; -+ struct rb_node *parent = NULL; -+ struct tree_node *tree_node; -+ u32 hash_max; -+ struct rmap_item *tree_rmap_item; -+ -+ while (*new) { -+ int cmp; -+ -+ tree_node = rb_entry(*new, struct tree_node, node); -+ -+ cmp = hash_cmp(hash, tree_node->hash); -+ -+ if (cmp < 0) { -+ parent = *new; -+ new = &parent->rb_left; -+ } else if (cmp > 0) { -+ parent = *new; -+ new = &parent->rb_right; -+ } else -+ break; -+ } -+ -+ if (*new) { -+ /* got the tree_node */ -+ if (tree_node->count == 1) { -+ tree_rmap_item = rb_entry(tree_node->sub_root.rb_node, -+ struct rmap_item, node); -+ BUG_ON(!tree_rmap_item); -+ -+ goto get_page_out; -+ } -+ -+ /* well, search the collision subtree */ -+ new = &tree_node->sub_root.rb_node; -+ BUG_ON(!*new); -+ hash_max = rmap_item_hash_max(rmap_item, hash); -+ -+ while (*new) { -+ int cmp; -+ -+ tree_rmap_item = rb_entry(*new, struct rmap_item, -+ node); -+ -+ cmp = hash_cmp(hash_max, tree_rmap_item->hash_max); -+ parent = *new; -+ if (cmp < 0) -+ new = &parent->rb_left; -+ else if (cmp > 0) -+ new = &parent->rb_right; -+ else -+ goto get_page_out; -+ } -+ } else { -+ /* alloc a new tree_node */ -+ tree_node = alloc_tree_node(&unstable_tree_node_list); -+ if (!tree_node) -+ return NULL; -+ -+ tree_node->hash = hash; -+ rb_link_node(&tree_node->node, parent, new); -+ rb_insert_color(&tree_node->node, &root_unstable_tree); -+ parent = NULL; -+ new = &tree_node->sub_root.rb_node; -+ } -+ -+ /* did not found even in sub-tree */ -+ rmap_item->tree_node = tree_node; -+ rmap_item->address |= UNSTABLE_FLAG; -+ rmap_item->hash_round = uksm_hash_round; -+ rb_link_node(&rmap_item->node, parent, new); -+ rb_insert_color(&rmap_item->node, &tree_node->sub_root); -+ -+ uksm_pages_unshared++; -+ return NULL; -+ -+get_page_out: -+ if (tree_rmap_item->page == rmap_item->page) -+ return NULL; -+ -+ if (get_tree_rmap_item_page(tree_rmap_item)) -+ return NULL; -+ -+ return tree_rmap_item; -+} -+ -+static void hold_anon_vma(struct rmap_item *rmap_item, -+ struct anon_vma *anon_vma) -+{ -+ rmap_item->anon_vma = anon_vma; -+ get_anon_vma(anon_vma); -+} -+ -+ -+/** -+ * stable_tree_append() - append a rmap_item to a stable node. Deduplication -+ * ratio statistics is done in this function. -+ * -+ */ -+static void stable_tree_append(struct rmap_item *rmap_item, -+ struct stable_node *stable_node, int logdedup) -+{ -+ struct node_vma *node_vma = NULL, *new_node_vma, *node_vma_cont = NULL; -+ unsigned long key = (unsigned long)rmap_item->slot; -+ unsigned long factor = rmap_item->slot->rung->step; -+ -+ BUG_ON(!stable_node); -+ rmap_item->address |= STABLE_FLAG; -+ -+ if (hlist_empty(&stable_node->hlist)) { -+ uksm_pages_shared++; -+ goto node_vma_new; -+ } else { -+ uksm_pages_sharing++; -+ } -+ -+ hlist_for_each_entry(node_vma, &stable_node->hlist, hlist) { -+ if (node_vma->key >= key) -+ break; -+ -+ if (logdedup) { -+ node_vma->slot->pages_bemerged += factor; -+ if (list_empty(&node_vma->slot->dedup_list)) -+ list_add(&node_vma->slot->dedup_list, -+ &vma_slot_dedup); -+ } -+ } -+ -+ if (node_vma) { -+ if (node_vma->key == key) { -+ node_vma_cont = hlist_entry_safe(node_vma->hlist.next, struct node_vma, hlist); -+ goto node_vma_ok; -+ } else if (node_vma->key > key) { -+ node_vma_cont = node_vma; -+ } -+ } -+ -+node_vma_new: -+ /* no same vma already in node, alloc a new node_vma */ -+ new_node_vma = alloc_node_vma(); -+ BUG_ON(!new_node_vma); -+ new_node_vma->head = stable_node; -+ new_node_vma->slot = rmap_item->slot; -+ -+ if (!node_vma) { -+ hlist_add_head(&new_node_vma->hlist, &stable_node->hlist); -+ } else if (node_vma->key != key) { -+ if (node_vma->key < key) -+ hlist_add_behind(&new_node_vma->hlist, &node_vma->hlist); -+ else { -+ hlist_add_before(&new_node_vma->hlist, -+ &node_vma->hlist); -+ } -+ -+ } -+ node_vma = new_node_vma; -+ -+node_vma_ok: /* ok, ready to add to the list */ -+ rmap_item->head = node_vma; -+ hlist_add_head(&rmap_item->hlist, &node_vma->rmap_hlist); -+ hold_anon_vma(rmap_item, rmap_item->slot->vma->anon_vma); -+ if (logdedup) { -+ rmap_item->slot->pages_merged++; -+ if (node_vma_cont) { -+ node_vma = node_vma_cont; -+ hlist_for_each_entry_continue(node_vma, hlist) { -+ node_vma->slot->pages_bemerged += factor; -+ if (list_empty(&node_vma->slot->dedup_list)) -+ list_add(&node_vma->slot->dedup_list, -+ &vma_slot_dedup); -+ } -+ } -+ } -+} -+ -+/* -+ * We use break_ksm to break COW on a ksm page: it's a stripped down -+ * -+ * if (get_user_pages(addr, 1, 1, 1, &page, NULL) == 1) -+ * put_page(page); -+ * -+ * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma, -+ * in case the application has unmapped and remapped mm,addr meanwhile. -+ * Could a ksm page appear anywhere else? Actually yes, in a VM_PFNMAP -+ * mmap of /dev/mem or /dev/kmem, where we would not want to touch it. -+ */ -+static int break_ksm(struct vm_area_struct *vma, unsigned long addr) -+{ -+ struct page *page; -+ int ret = 0; -+ -+ do { -+ cond_resched(); -+ page = follow_page(vma, addr, FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE); -+ if (IS_ERR_OR_NULL(page)) -+ break; -+ if (PageKsm(page)) { -+ ret = handle_mm_fault(vma, addr, -+ FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE); -+ } else -+ ret = VM_FAULT_WRITE; -+ put_page(page); -+ } while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM))); -+ /* -+ * We must loop because handle_mm_fault() may back out if there's -+ * any difficulty e.g. if pte accessed bit gets updated concurrently. -+ * -+ * VM_FAULT_WRITE is what we have been hoping for: it indicates that -+ * COW has been broken, even if the vma does not permit VM_WRITE; -+ * but note that a concurrent fault might break PageKsm for us. -+ * -+ * VM_FAULT_SIGBUS could occur if we race with truncation of the -+ * backing file, which also invalidates anonymous pages: that's -+ * okay, that truncation will have unmapped the PageKsm for us. -+ * -+ * VM_FAULT_OOM: at the time of writing (late July 2009), setting -+ * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the -+ * current task has TIF_MEMDIE set, and will be OOM killed on return -+ * to user; and ksmd, having no mm, would never be chosen for that. -+ * -+ * But if the mm is in a limited mem_cgroup, then the fault may fail -+ * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and -+ * even ksmd can fail in this way - though it's usually breaking ksm -+ * just to undo a merge it made a moment before, so unlikely to oom. -+ * -+ * That's a pity: we might therefore have more kernel pages allocated -+ * than we're counting as nodes in the stable tree; but uksm_do_scan -+ * will retry to break_cow on each pass, so should recover the page -+ * in due course. The important thing is to not let VM_MERGEABLE -+ * be cleared while any such pages might remain in the area. -+ */ -+ return (ret & VM_FAULT_OOM) ? -ENOMEM : 0; -+} -+ -+static void break_cow(struct rmap_item *rmap_item) -+{ -+ struct vm_area_struct *vma = rmap_item->slot->vma; -+ struct mm_struct *mm = vma->vm_mm; -+ unsigned long addr = get_rmap_addr(rmap_item); -+ -+ if (uksm_test_exit(mm)) -+ goto out; -+ -+ break_ksm(vma, addr); -+out: -+ return; -+} -+ -+/* -+ * Though it's very tempting to unmerge in_stable_tree(rmap_item)s rather -+ * than check every pte of a given vma, the locking doesn't quite work for -+ * that - an rmap_item is assigned to the stable tree after inserting ksm -+ * page and upping mmap_sem. Nor does it fit with the way we skip dup'ing -+ * rmap_items from parent to child at fork time (so as not to waste time -+ * if exit comes before the next scan reaches it). -+ * -+ * Similarly, although we'd like to remove rmap_items (so updating counts -+ * and freeing memory) when unmerging an area, it's easier to leave that -+ * to the next pass of ksmd - consider, for example, how ksmd might be -+ * in cmp_and_merge_page on one of the rmap_items we would be removing. -+ */ -+inline int unmerge_uksm_pages(struct vm_area_struct *vma, -+ unsigned long start, unsigned long end) -+{ -+ unsigned long addr; -+ int err = 0; -+ -+ for (addr = start; addr < end && !err; addr += PAGE_SIZE) { -+ if (uksm_test_exit(vma->vm_mm)) -+ break; -+ if (signal_pending(current)) -+ err = -ERESTARTSYS; -+ else -+ err = break_ksm(vma, addr); -+ } -+ return err; -+} -+ -+static inline void inc_uksm_pages_scanned(void) -+{ -+ u64 delta; -+ -+ -+ if (uksm_pages_scanned == U64_MAX) { -+ encode_benefit(); -+ -+ delta = uksm_pages_scanned >> pages_scanned_base; -+ -+ if (CAN_OVERFLOW_U64(pages_scanned_stored, delta)) { -+ pages_scanned_stored >>= 1; -+ delta >>= 1; -+ pages_scanned_base++; -+ } -+ -+ pages_scanned_stored += delta; -+ -+ uksm_pages_scanned = uksm_pages_scanned_last = 0; -+ } -+ -+ uksm_pages_scanned++; -+} -+ -+static inline int find_zero_page_hash(int strength, u32 hash) -+{ -+ return (zero_hash_table[strength] == hash); -+} -+ -+static -+int cmp_and_merge_zero_page(struct vm_area_struct *vma, struct page *page) -+{ -+ struct page *zero_page = empty_uksm_zero_page; -+ struct mm_struct *mm = vma->vm_mm; -+ pte_t orig_pte = __pte(0); -+ int err = -EFAULT; -+ -+ if (uksm_test_exit(mm)) -+ goto out; -+ -+ if (!trylock_page(page)) -+ goto out; -+ -+ if (!PageAnon(page)) -+ goto out_unlock; -+ -+ if (PageTransCompound(page)) { -+ err = split_huge_page(page); -+ if (err) -+ goto out_unlock; -+ } -+ -+ if (write_protect_page(vma, page, &orig_pte, 0) == 0) { -+ if (is_page_full_zero(page)) -+ err = replace_page(vma, page, zero_page, orig_pte); -+ } -+ -+out_unlock: -+ unlock_page(page); -+out: -+ return err; -+} -+ -+/* -+ * cmp_and_merge_page() - first see if page can be merged into the stable -+ * tree; if not, compare hash to previous and if it's the same, see if page -+ * can be inserted into the unstable tree, or merged with a page already there -+ * and both transferred to the stable tree. -+ * -+ * @page: the page that we are searching identical page to. -+ * @rmap_item: the reverse mapping into the virtual address of this page -+ */ -+static void cmp_and_merge_page(struct rmap_item *rmap_item, u32 hash) -+{ -+ struct rmap_item *tree_rmap_item; -+ struct page *page; -+ struct page *kpage = NULL; -+ u32 hash_max; -+ int err; -+ unsigned int success1, success2; -+ struct stable_node *snode; -+ int cmp; -+ struct rb_node *parent = NULL, **new; -+ -+ remove_rmap_item_from_tree(rmap_item); -+ page = rmap_item->page; -+ -+ /* We first start with searching the page inside the stable tree */ -+ kpage = stable_tree_search(rmap_item, hash); -+ if (kpage) { -+ err = try_to_merge_with_uksm_page(rmap_item, kpage, -+ hash); -+ if (!err) { -+ /* -+ * The page was successfully merged, add -+ * its rmap_item to the stable tree. -+ * page lock is needed because it's -+ * racing with try_to_unmap_ksm(), etc. -+ */ -+ lock_page(kpage); -+ snode = page_stable_node(kpage); -+ stable_tree_append(rmap_item, snode, 1); -+ unlock_page(kpage); -+ put_page(kpage); -+ return; /* success */ -+ } -+ put_page(kpage); -+ -+ /* -+ * if it's a collision and it has been search in sub-rbtree -+ * (hash_max != 0), we want to abort, because if it is -+ * successfully merged in unstable tree, the collision trends to -+ * happen again. -+ */ -+ if (err == MERGE_ERR_COLLI && rmap_item->hash_max) -+ return; -+ } -+ -+ tree_rmap_item = -+ unstable_tree_search_insert(rmap_item, hash); -+ if (tree_rmap_item) { -+ err = try_to_merge_two_pages(rmap_item, tree_rmap_item, hash); -+ /* -+ * As soon as we merge this page, we want to remove the -+ * rmap_item of the page we have merged with from the unstable -+ * tree, and insert it instead as new node in the stable tree. -+ */ -+ if (!err) { -+ kpage = page; -+ remove_rmap_item_from_tree(tree_rmap_item); -+ lock_page(kpage); -+ snode = stable_tree_insert(&kpage, hash, -+ rmap_item, tree_rmap_item, -+ &success1, &success2); -+ -+ /* -+ * Do not log dedup for tree item, it's not counted as -+ * scanned in this round. -+ */ -+ if (success2) -+ stable_tree_append(tree_rmap_item, snode, 0); -+ -+ /* -+ * The order of these two stable append is important: -+ * we are scanning rmap_item. -+ */ -+ if (success1) -+ stable_tree_append(rmap_item, snode, 1); -+ -+ /* -+ * The original kpage may be unlocked inside -+ * stable_tree_insert() already. This page -+ * should be unlocked before doing -+ * break_cow(). -+ */ -+ unlock_page(kpage); -+ -+ if (!success1) -+ break_cow(rmap_item); -+ -+ if (!success2) -+ break_cow(tree_rmap_item); -+ -+ } else if (err == MERGE_ERR_COLLI) { -+ BUG_ON(tree_rmap_item->tree_node->count > 1); -+ -+ rmap_item_hash_max(tree_rmap_item, -+ tree_rmap_item->tree_node->hash); -+ -+ hash_max = rmap_item_hash_max(rmap_item, hash); -+ cmp = hash_cmp(hash_max, tree_rmap_item->hash_max); -+ parent = &tree_rmap_item->node; -+ if (cmp < 0) -+ new = &parent->rb_left; -+ else if (cmp > 0) -+ new = &parent->rb_right; -+ else -+ goto put_up_out; -+ -+ rmap_item->tree_node = tree_rmap_item->tree_node; -+ rmap_item->address |= UNSTABLE_FLAG; -+ rmap_item->hash_round = uksm_hash_round; -+ rb_link_node(&rmap_item->node, parent, new); -+ rb_insert_color(&rmap_item->node, -+ &tree_rmap_item->tree_node->sub_root); -+ rmap_item->tree_node->count++; -+ } else { -+ /* -+ * either one of the page has changed or they collide -+ * at the max hash, we consider them as ill items. -+ */ -+ remove_rmap_item_from_tree(tree_rmap_item); -+ } -+put_up_out: -+ put_page(tree_rmap_item->page); -+ up_read(&tree_rmap_item->slot->vma->vm_mm->mmap_sem); -+ } -+} -+ -+ -+ -+ -+static inline unsigned long get_pool_index(struct vma_slot *slot, -+ unsigned long index) -+{ -+ unsigned long pool_index; -+ -+ pool_index = (sizeof(struct rmap_list_entry *) * index) >> PAGE_SHIFT; -+ if (pool_index >= slot->pool_size) -+ BUG(); -+ return pool_index; -+} -+ -+static inline unsigned long index_page_offset(unsigned long index) -+{ -+ return offset_in_page(sizeof(struct rmap_list_entry *) * index); -+} -+ -+static inline -+struct rmap_list_entry *get_rmap_list_entry(struct vma_slot *slot, -+ unsigned long index, int need_alloc) -+{ -+ unsigned long pool_index; -+ struct page *page; -+ void *addr; -+ -+ -+ pool_index = get_pool_index(slot, index); -+ if (!slot->rmap_list_pool[pool_index]) { -+ if (!need_alloc) -+ return NULL; -+ -+ page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN); -+ if (!page) -+ return NULL; -+ -+ slot->rmap_list_pool[pool_index] = page; -+ } -+ -+ addr = kmap(slot->rmap_list_pool[pool_index]); -+ addr += index_page_offset(index); -+ -+ return addr; -+} -+ -+static inline void put_rmap_list_entry(struct vma_slot *slot, -+ unsigned long index) -+{ -+ unsigned long pool_index; -+ -+ pool_index = get_pool_index(slot, index); -+ BUG_ON(!slot->rmap_list_pool[pool_index]); -+ kunmap(slot->rmap_list_pool[pool_index]); -+} -+ -+static inline int entry_is_new(struct rmap_list_entry *entry) -+{ -+ return !entry->item; -+} -+ -+static inline unsigned long get_index_orig_addr(struct vma_slot *slot, -+ unsigned long index) -+{ -+ return slot->vma->vm_start + (index << PAGE_SHIFT); -+} -+ -+static inline unsigned long get_entry_address(struct rmap_list_entry *entry) -+{ -+ unsigned long addr; -+ -+ if (is_addr(entry->addr)) -+ addr = get_clean_addr(entry->addr); -+ else if (entry->item) -+ addr = get_rmap_addr(entry->item); -+ else -+ BUG(); -+ -+ return addr; -+} -+ -+static inline struct rmap_item *get_entry_item(struct rmap_list_entry *entry) -+{ -+ if (is_addr(entry->addr)) -+ return NULL; -+ -+ return entry->item; -+} -+ -+static inline void inc_rmap_list_pool_count(struct vma_slot *slot, -+ unsigned long index) -+{ -+ unsigned long pool_index; -+ -+ pool_index = get_pool_index(slot, index); -+ BUG_ON(!slot->rmap_list_pool[pool_index]); -+ slot->pool_counts[pool_index]++; -+} -+ -+static inline void dec_rmap_list_pool_count(struct vma_slot *slot, -+ unsigned long index) -+{ -+ unsigned long pool_index; -+ -+ pool_index = get_pool_index(slot, index); -+ BUG_ON(!slot->rmap_list_pool[pool_index]); -+ BUG_ON(!slot->pool_counts[pool_index]); -+ slot->pool_counts[pool_index]--; -+} -+ -+static inline int entry_has_rmap(struct rmap_list_entry *entry) -+{ -+ return !is_addr(entry->addr) && entry->item; -+} -+ -+static inline void swap_entries(struct rmap_list_entry *entry1, -+ unsigned long index1, -+ struct rmap_list_entry *entry2, -+ unsigned long index2) -+{ -+ struct rmap_list_entry tmp; -+ -+ /* swapping two new entries is meaningless */ -+ BUG_ON(entry_is_new(entry1) && entry_is_new(entry2)); -+ -+ tmp = *entry1; -+ *entry1 = *entry2; -+ *entry2 = tmp; -+ -+ if (entry_has_rmap(entry1)) -+ entry1->item->entry_index = index1; -+ -+ if (entry_has_rmap(entry2)) -+ entry2->item->entry_index = index2; -+ -+ if (entry_has_rmap(entry1) && !entry_has_rmap(entry2)) { -+ inc_rmap_list_pool_count(entry1->item->slot, index1); -+ dec_rmap_list_pool_count(entry1->item->slot, index2); -+ } else if (!entry_has_rmap(entry1) && entry_has_rmap(entry2)) { -+ inc_rmap_list_pool_count(entry2->item->slot, index2); -+ dec_rmap_list_pool_count(entry2->item->slot, index1); -+ } -+} -+ -+static inline void free_entry_item(struct rmap_list_entry *entry) -+{ -+ unsigned long index; -+ struct rmap_item *item; -+ -+ if (!is_addr(entry->addr)) { -+ BUG_ON(!entry->item); -+ item = entry->item; -+ entry->addr = get_rmap_addr(item); -+ set_is_addr(entry->addr); -+ index = item->entry_index; -+ remove_rmap_item_from_tree(item); -+ dec_rmap_list_pool_count(item->slot, index); -+ free_rmap_item(item); -+ } -+} -+ -+static inline int pool_entry_boundary(unsigned long index) -+{ -+ unsigned long linear_addr; -+ -+ linear_addr = sizeof(struct rmap_list_entry *) * index; -+ return index && !offset_in_page(linear_addr); -+} -+ -+static inline void try_free_last_pool(struct vma_slot *slot, -+ unsigned long index) -+{ -+ unsigned long pool_index; -+ -+ pool_index = get_pool_index(slot, index); -+ if (slot->rmap_list_pool[pool_index] && -+ !slot->pool_counts[pool_index]) { -+ __free_page(slot->rmap_list_pool[pool_index]); -+ slot->rmap_list_pool[pool_index] = NULL; -+ slot->flags |= UKSM_SLOT_NEED_SORT; -+ } -+ -+} -+ -+static inline unsigned long vma_item_index(struct vm_area_struct *vma, -+ struct rmap_item *item) -+{ -+ return (get_rmap_addr(item) - vma->vm_start) >> PAGE_SHIFT; -+} -+ -+static int within_same_pool(struct vma_slot *slot, -+ unsigned long i, unsigned long j) -+{ -+ unsigned long pool_i, pool_j; -+ -+ pool_i = get_pool_index(slot, i); -+ pool_j = get_pool_index(slot, j); -+ -+ return (pool_i == pool_j); -+} -+ -+static void sort_rmap_entry_list(struct vma_slot *slot) -+{ -+ unsigned long i, j; -+ struct rmap_list_entry *entry, *swap_entry; -+ -+ entry = get_rmap_list_entry(slot, 0, 0); -+ for (i = 0; i < slot->pages; ) { -+ -+ if (!entry) -+ goto skip_whole_pool; -+ -+ if (entry_is_new(entry)) -+ goto next_entry; -+ -+ if (is_addr(entry->addr)) { -+ entry->addr = 0; -+ goto next_entry; -+ } -+ -+ j = vma_item_index(slot->vma, entry->item); -+ if (j == i) -+ goto next_entry; -+ -+ if (within_same_pool(slot, i, j)) -+ swap_entry = entry + j - i; -+ else -+ swap_entry = get_rmap_list_entry(slot, j, 1); -+ -+ swap_entries(entry, i, swap_entry, j); -+ if (!within_same_pool(slot, i, j)) -+ put_rmap_list_entry(slot, j); -+ continue; -+ -+skip_whole_pool: -+ i += PAGE_SIZE / sizeof(*entry); -+ if (i < slot->pages) -+ entry = get_rmap_list_entry(slot, i, 0); -+ continue; -+ -+next_entry: -+ if (i >= slot->pages - 1 || -+ !within_same_pool(slot, i, i + 1)) { -+ put_rmap_list_entry(slot, i); -+ if (i + 1 < slot->pages) -+ entry = get_rmap_list_entry(slot, i + 1, 0); -+ } else -+ entry++; -+ i++; -+ continue; -+ } -+ -+ /* free empty pool entries which contain no rmap_item */ -+ /* CAN be simplied to based on only pool_counts when bug freed !!!!! */ -+ for (i = 0; i < slot->pool_size; i++) { -+ unsigned char has_rmap; -+ void *addr; -+ -+ if (!slot->rmap_list_pool[i]) -+ continue; -+ -+ has_rmap = 0; -+ addr = kmap(slot->rmap_list_pool[i]); -+ BUG_ON(!addr); -+ for (j = 0; j < PAGE_SIZE / sizeof(*entry); j++) { -+ entry = (struct rmap_list_entry *)addr + j; -+ if (is_addr(entry->addr)) -+ continue; -+ if (!entry->item) -+ continue; -+ has_rmap = 1; -+ } -+ kunmap(slot->rmap_list_pool[i]); -+ if (!has_rmap) { -+ BUG_ON(slot->pool_counts[i]); -+ __free_page(slot->rmap_list_pool[i]); -+ slot->rmap_list_pool[i] = NULL; -+ } -+ } -+ -+ slot->flags &= ~UKSM_SLOT_NEED_SORT; -+} -+ -+/* -+ * vma_fully_scanned() - if all the pages in this slot have been scanned. -+ */ -+static inline int vma_fully_scanned(struct vma_slot *slot) -+{ -+ return slot->pages_scanned == slot->pages; -+} -+ -+/** -+ * get_next_rmap_item() - Get the next rmap_item in a vma_slot according to -+ * its random permutation. This function is embedded with the random -+ * permutation index management code. -+ */ -+static struct rmap_item *get_next_rmap_item(struct vma_slot *slot, u32 *hash) -+{ -+ unsigned long rand_range, addr, swap_index, scan_index; -+ struct rmap_item *item = NULL; -+ struct rmap_list_entry *scan_entry, *swap_entry = NULL; -+ struct page *page; -+ -+ scan_index = swap_index = slot->pages_scanned % slot->pages; -+ -+ if (pool_entry_boundary(scan_index)) -+ try_free_last_pool(slot, scan_index - 1); -+ -+ if (vma_fully_scanned(slot)) { -+ if (slot->flags & UKSM_SLOT_NEED_SORT) -+ slot->flags |= UKSM_SLOT_NEED_RERAND; -+ else -+ slot->flags &= ~UKSM_SLOT_NEED_RERAND; -+ if (slot->flags & UKSM_SLOT_NEED_SORT) -+ sort_rmap_entry_list(slot); -+ } -+ -+ scan_entry = get_rmap_list_entry(slot, scan_index, 1); -+ if (!scan_entry) -+ return NULL; -+ -+ if (entry_is_new(scan_entry)) { -+ scan_entry->addr = get_index_orig_addr(slot, scan_index); -+ set_is_addr(scan_entry->addr); -+ } -+ -+ if (slot->flags & UKSM_SLOT_NEED_RERAND) { -+ rand_range = slot->pages - scan_index; -+ BUG_ON(!rand_range); -+ swap_index = scan_index + (prandom_u32() % rand_range); -+ } -+ -+ if (swap_index != scan_index) { -+ swap_entry = get_rmap_list_entry(slot, swap_index, 1); -+ -+ if (!swap_entry) -+ return NULL; -+ -+ if (entry_is_new(swap_entry)) { -+ swap_entry->addr = get_index_orig_addr(slot, -+ swap_index); -+ set_is_addr(swap_entry->addr); -+ } -+ swap_entries(scan_entry, scan_index, swap_entry, swap_index); -+ } -+ -+ addr = get_entry_address(scan_entry); -+ item = get_entry_item(scan_entry); -+ BUG_ON(addr > slot->vma->vm_end || addr < slot->vma->vm_start); -+ -+ page = follow_page(slot->vma, addr, FOLL_GET); -+ if (IS_ERR_OR_NULL(page)) -+ goto nopage; -+ -+ if (!PageAnon(page)) -+ goto putpage; -+ -+ /*check is zero_page pfn or uksm_zero_page*/ -+ if ((page_to_pfn(page) == zero_pfn) -+ || (page_to_pfn(page) == uksm_zero_pfn)) -+ goto putpage; -+ -+ flush_anon_page(slot->vma, page, addr); -+ flush_dcache_page(page); -+ -+ -+ *hash = page_hash(page, hash_strength, 1); -+ inc_uksm_pages_scanned(); -+ /*if the page content all zero, re-map to zero-page*/ -+ if (find_zero_page_hash(hash_strength, *hash)) { -+ if (!cmp_and_merge_zero_page(slot->vma, page)) { -+ slot->pages_merged++; -+ -+ /* For full-zero pages, no need to create rmap item */ -+ goto putpage; -+ } else { -+ inc_rshash_neg(memcmp_cost / 2); -+ } -+ } -+ -+ if (!item) { -+ item = alloc_rmap_item(); -+ if (item) { -+ /* It has already been zeroed */ -+ item->slot = slot; -+ item->address = addr; -+ item->entry_index = scan_index; -+ scan_entry->item = item; -+ inc_rmap_list_pool_count(slot, scan_index); -+ } else -+ goto putpage; -+ } -+ -+ BUG_ON(item->slot != slot); -+ /* the page may have changed */ -+ item->page = page; -+ put_rmap_list_entry(slot, scan_index); -+ if (swap_entry) -+ put_rmap_list_entry(slot, swap_index); -+ return item; -+ -+putpage: -+ put_page(page); -+ page = NULL; -+nopage: -+ /* no page, store addr back and free rmap_item if possible */ -+ free_entry_item(scan_entry); -+ put_rmap_list_entry(slot, scan_index); -+ if (swap_entry) -+ put_rmap_list_entry(slot, swap_index); -+ return NULL; -+} -+ -+static inline int in_stable_tree(struct rmap_item *rmap_item) -+{ -+ return rmap_item->address & STABLE_FLAG; -+} -+ -+/** -+ * scan_vma_one_page() - scan the next page in a vma_slot. Called with -+ * mmap_sem locked. -+ */ -+static noinline void scan_vma_one_page(struct vma_slot *slot) -+{ -+ u32 hash; -+ struct mm_struct *mm; -+ struct rmap_item *rmap_item = NULL; -+ struct vm_area_struct *vma = slot->vma; -+ -+ mm = vma->vm_mm; -+ BUG_ON(!mm); -+ BUG_ON(!slot); -+ -+ rmap_item = get_next_rmap_item(slot, &hash); -+ if (!rmap_item) -+ goto out1; -+ -+ if (PageKsm(rmap_item->page) && in_stable_tree(rmap_item)) -+ goto out2; -+ -+ cmp_and_merge_page(rmap_item, hash); -+out2: -+ put_page(rmap_item->page); -+out1: -+ slot->pages_scanned++; -+ slot->this_sampled++; -+ if (slot->fully_scanned_round != fully_scanned_round) -+ scanned_virtual_pages++; -+ -+ if (vma_fully_scanned(slot)) -+ slot->fully_scanned_round = fully_scanned_round; -+} -+ -+static inline unsigned long rung_get_pages(struct scan_rung *rung) -+{ -+ struct slot_tree_node *node; -+ -+ if (!rung->vma_root.rnode) -+ return 0; -+ -+ node = container_of(rung->vma_root.rnode, struct slot_tree_node, snode); -+ -+ return node->size; -+} -+ -+#define RUNG_SAMPLED_MIN 3 -+ -+static inline -+void uksm_calc_rung_step(struct scan_rung *rung, -+ unsigned long page_time, unsigned long ratio) -+{ -+ unsigned long sampled, pages; -+ -+ /* will be fully scanned ? */ -+ if (!rung->cover_msecs) { -+ rung->step = 1; -+ return; -+ } -+ -+ sampled = rung->cover_msecs * (NSEC_PER_MSEC / TIME_RATIO_SCALE) -+ * ratio / page_time; -+ -+ /* -+ * Before we finsish a scan round and expensive per-round jobs, -+ * we need to have a chance to estimate the per page time. So -+ * the sampled number can not be too small. -+ */ -+ if (sampled < RUNG_SAMPLED_MIN) -+ sampled = RUNG_SAMPLED_MIN; -+ -+ pages = rung_get_pages(rung); -+ if (likely(pages > sampled)) -+ rung->step = pages / sampled; -+ else -+ rung->step = 1; -+} -+ -+static inline int step_need_recalc(struct scan_rung *rung) -+{ -+ unsigned long pages, stepmax; -+ -+ pages = rung_get_pages(rung); -+ stepmax = pages / RUNG_SAMPLED_MIN; -+ -+ return pages && (rung->step > pages || -+ (stepmax && rung->step > stepmax)); -+} -+ -+static inline -+void reset_current_scan(struct scan_rung *rung, int finished, int step_recalc) -+{ -+ struct vma_slot *slot; -+ -+ if (finished) -+ rung->flags |= UKSM_RUNG_ROUND_FINISHED; -+ -+ if (step_recalc || step_need_recalc(rung)) { -+ uksm_calc_rung_step(rung, uksm_ema_page_time, rung->cpu_ratio); -+ BUG_ON(step_need_recalc(rung)); -+ } -+ -+ slot_iter_index = prandom_u32() % rung->step; -+ BUG_ON(!rung->vma_root.rnode); -+ slot = sradix_tree_next(&rung->vma_root, NULL, 0, slot_iter); -+ BUG_ON(!slot); -+ -+ rung->current_scan = slot; -+ rung->current_offset = slot_iter_index; -+} -+ -+static inline struct sradix_tree_root *slot_get_root(struct vma_slot *slot) -+{ -+ return &slot->rung->vma_root; -+} -+ -+/* -+ * return if resetted. -+ */ -+static int advance_current_scan(struct scan_rung *rung) -+{ -+ unsigned short n; -+ struct vma_slot *slot, *next = NULL; -+ -+ BUG_ON(!rung->vma_root.num); -+ -+ slot = rung->current_scan; -+ n = (slot->pages - rung->current_offset) % rung->step; -+ slot_iter_index = rung->step - n; -+ next = sradix_tree_next(&rung->vma_root, slot->snode, -+ slot->sindex, slot_iter); -+ -+ if (next) { -+ rung->current_offset = slot_iter_index; -+ rung->current_scan = next; -+ return 0; -+ } else { -+ reset_current_scan(rung, 1, 0); -+ return 1; -+ } -+} -+ -+static inline void rung_rm_slot(struct vma_slot *slot) -+{ -+ struct scan_rung *rung = slot->rung; -+ struct sradix_tree_root *root; -+ -+ if (rung->current_scan == slot) -+ advance_current_scan(rung); -+ -+ root = slot_get_root(slot); -+ sradix_tree_delete_from_leaf(root, slot->snode, slot->sindex); -+ slot->snode = NULL; -+ if (step_need_recalc(rung)) { -+ uksm_calc_rung_step(rung, uksm_ema_page_time, rung->cpu_ratio); -+ BUG_ON(step_need_recalc(rung)); -+ } -+ -+ /* In case advance_current_scan loop back to this slot again */ -+ if (rung->vma_root.num && rung->current_scan == slot) -+ reset_current_scan(slot->rung, 1, 0); -+} -+ -+static inline void rung_add_new_slots(struct scan_rung *rung, -+ struct vma_slot **slots, unsigned long num) -+{ -+ int err; -+ struct vma_slot *slot; -+ unsigned long i; -+ struct sradix_tree_root *root = &rung->vma_root; -+ -+ err = sradix_tree_enter(root, (void **)slots, num); -+ BUG_ON(err); -+ -+ for (i = 0; i < num; i++) { -+ slot = slots[i]; -+ slot->rung = rung; -+ BUG_ON(vma_fully_scanned(slot)); -+ } -+ -+ if (rung->vma_root.num == num) -+ reset_current_scan(rung, 0, 1); -+} -+ -+static inline int rung_add_one_slot(struct scan_rung *rung, -+ struct vma_slot *slot) -+{ -+ int err; -+ -+ err = sradix_tree_enter(&rung->vma_root, (void **)&slot, 1); -+ if (err) -+ return err; -+ -+ slot->rung = rung; -+ if (rung->vma_root.num == 1) -+ reset_current_scan(rung, 0, 1); -+ -+ return 0; -+} -+ -+/* -+ * Return true if the slot is deleted from its rung. -+ */ -+static inline int vma_rung_enter(struct vma_slot *slot, struct scan_rung *rung) -+{ -+ struct scan_rung *old_rung = slot->rung; -+ int err; -+ -+ if (old_rung == rung) -+ return 0; -+ -+ rung_rm_slot(slot); -+ err = rung_add_one_slot(rung, slot); -+ if (err) { -+ err = rung_add_one_slot(old_rung, slot); -+ WARN_ON(err); /* OOPS, badly OOM, we lost this slot */ -+ } -+ -+ return 1; -+} -+ -+static inline int vma_rung_up(struct vma_slot *slot) -+{ -+ struct scan_rung *rung; -+ -+ rung = slot->rung; -+ if (slot->rung != &uksm_scan_ladder[SCAN_LADDER_SIZE-1]) -+ rung++; -+ -+ return vma_rung_enter(slot, rung); -+} -+ -+static inline int vma_rung_down(struct vma_slot *slot) -+{ -+ struct scan_rung *rung; -+ -+ rung = slot->rung; -+ if (slot->rung != &uksm_scan_ladder[0]) -+ rung--; -+ -+ return vma_rung_enter(slot, rung); -+} -+ -+/** -+ * cal_dedup_ratio() - Calculate the deduplication ratio for this slot. -+ */ -+static unsigned long cal_dedup_ratio(struct vma_slot *slot) -+{ -+ unsigned long ret; -+ unsigned long pages; -+ -+ pages = slot->this_sampled; -+ if (!pages) -+ return 0; -+ -+ BUG_ON(slot->pages_scanned == slot->last_scanned); -+ -+ ret = slot->pages_merged; -+ -+ /* Thrashing area filtering */ -+ if (ret && uksm_thrash_threshold) { -+ if (slot->pages_cowed * 100 / slot->pages_merged -+ > uksm_thrash_threshold) { -+ ret = 0; -+ } else { -+ ret = slot->pages_merged - slot->pages_cowed; -+ } -+ } -+ -+ return ret * 100 / pages; -+} -+ -+/** -+ * cal_dedup_ratio() - Calculate the deduplication ratio for this slot. -+ */ -+static unsigned long cal_dedup_ratio_old(struct vma_slot *slot) -+{ -+ unsigned long ret; -+ unsigned long pages; -+ -+ pages = slot->pages; -+ if (!pages) -+ return 0; -+ -+ ret = slot->pages_bemerged; -+ -+ /* Thrashing area filtering */ -+ if (ret && uksm_thrash_threshold) { -+ if (slot->pages_cowed * 100 / slot->pages_bemerged -+ > uksm_thrash_threshold) { -+ ret = 0; -+ } else { -+ ret = slot->pages_bemerged - slot->pages_cowed; -+ } -+ } -+ -+ return ret * 100 / pages; -+} -+ -+/** -+ * stable_node_reinsert() - When the hash_strength has been adjusted, the -+ * stable tree need to be restructured, this is the function re-inserting the -+ * stable node. -+ */ -+static inline void stable_node_reinsert(struct stable_node *new_node, -+ struct page *page, -+ struct rb_root *root_treep, -+ struct list_head *tree_node_listp, -+ u32 hash) -+{ -+ struct rb_node **new = &root_treep->rb_node; -+ struct rb_node *parent = NULL; -+ struct stable_node *stable_node; -+ struct tree_node *tree_node; -+ struct page *tree_page; -+ int cmp; -+ -+ while (*new) { -+ int cmp; -+ -+ tree_node = rb_entry(*new, struct tree_node, node); -+ -+ cmp = hash_cmp(hash, tree_node->hash); -+ -+ if (cmp < 0) { -+ parent = *new; -+ new = &parent->rb_left; -+ } else if (cmp > 0) { -+ parent = *new; -+ new = &parent->rb_right; -+ } else -+ break; -+ } -+ -+ if (*new) { -+ /* find a stable tree node with same first level hash value */ -+ stable_node_hash_max(new_node, page, hash); -+ if (tree_node->count == 1) { -+ stable_node = rb_entry(tree_node->sub_root.rb_node, -+ struct stable_node, node); -+ tree_page = get_uksm_page(stable_node, 1, 0); -+ if (tree_page) { -+ stable_node_hash_max(stable_node, -+ tree_page, hash); -+ put_page(tree_page); -+ -+ /* prepare for stable node insertion */ -+ -+ cmp = hash_cmp(new_node->hash_max, -+ stable_node->hash_max); -+ parent = &stable_node->node; -+ if (cmp < 0) -+ new = &parent->rb_left; -+ else if (cmp > 0) -+ new = &parent->rb_right; -+ else -+ goto failed; -+ -+ goto add_node; -+ } else { -+ /* the only stable_node deleted, the tree node -+ * was not deleted. -+ */ -+ goto tree_node_reuse; -+ } -+ } -+ -+ /* well, search the collision subtree */ -+ new = &tree_node->sub_root.rb_node; -+ parent = NULL; -+ BUG_ON(!*new); -+ while (*new) { -+ int cmp; -+ -+ stable_node = rb_entry(*new, struct stable_node, node); -+ -+ cmp = hash_cmp(new_node->hash_max, -+ stable_node->hash_max); -+ -+ if (cmp < 0) { -+ parent = *new; -+ new = &parent->rb_left; -+ } else if (cmp > 0) { -+ parent = *new; -+ new = &parent->rb_right; -+ } else { -+ /* oh, no, still a collision */ -+ goto failed; -+ } -+ } -+ -+ goto add_node; -+ } -+ -+ /* no tree node found */ -+ tree_node = alloc_tree_node(tree_node_listp); -+ if (!tree_node) { -+ pr_err("UKSM: memory allocation error!\n"); -+ goto failed; -+ } else { -+ tree_node->hash = hash; -+ rb_link_node(&tree_node->node, parent, new); -+ rb_insert_color(&tree_node->node, root_treep); -+ -+tree_node_reuse: -+ /* prepare for stable node insertion */ -+ parent = NULL; -+ new = &tree_node->sub_root.rb_node; -+ } -+ -+add_node: -+ rb_link_node(&new_node->node, parent, new); -+ rb_insert_color(&new_node->node, &tree_node->sub_root); -+ new_node->tree_node = tree_node; -+ tree_node->count++; -+ return; -+ -+failed: -+ /* This can only happen when two nodes have collided -+ * in two levels. -+ */ -+ new_node->tree_node = NULL; -+ return; -+} -+ -+static inline void free_all_tree_nodes(struct list_head *list) -+{ -+ struct tree_node *node, *tmp; -+ -+ list_for_each_entry_safe(node, tmp, list, all_list) { -+ free_tree_node(node); -+ } -+} -+ -+/** -+ * stable_tree_delta_hash() - Delta hash the stable tree from previous hash -+ * strength to the current hash_strength. It re-structures the hole tree. -+ */ -+static inline void stable_tree_delta_hash(u32 prev_hash_strength) -+{ -+ struct stable_node *node, *tmp; -+ struct rb_root *root_new_treep; -+ struct list_head *new_tree_node_listp; -+ -+ stable_tree_index = (stable_tree_index + 1) % 2; -+ root_new_treep = &root_stable_tree[stable_tree_index]; -+ new_tree_node_listp = &stable_tree_node_list[stable_tree_index]; -+ *root_new_treep = RB_ROOT; -+ BUG_ON(!list_empty(new_tree_node_listp)); -+ -+ /* -+ * we need to be safe, the node could be removed by get_uksm_page() -+ */ -+ list_for_each_entry_safe(node, tmp, &stable_node_list, all_list) { -+ void *addr; -+ struct page *node_page; -+ u32 hash; -+ -+ /* -+ * We are completely re-structuring the stable nodes to a new -+ * stable tree. We don't want to touch the old tree unlinks and -+ * old tree_nodes. The old tree_nodes will be freed at once. -+ */ -+ node_page = get_uksm_page(node, 0, 0); -+ if (!node_page) -+ continue; -+ -+ if (node->tree_node) { -+ hash = node->tree_node->hash; -+ -+ addr = kmap_atomic(node_page); -+ -+ hash = delta_hash(addr, prev_hash_strength, -+ hash_strength, hash); -+ kunmap_atomic(addr); -+ } else { -+ /* -+ *it was not inserted to rbtree due to collision in last -+ *round scan. -+ */ -+ hash = page_hash(node_page, hash_strength, 0); -+ } -+ -+ stable_node_reinsert(node, node_page, root_new_treep, -+ new_tree_node_listp, hash); -+ put_page(node_page); -+ } -+ -+ root_stable_treep = root_new_treep; -+ free_all_tree_nodes(stable_tree_node_listp); -+ BUG_ON(!list_empty(stable_tree_node_listp)); -+ stable_tree_node_listp = new_tree_node_listp; -+} -+ -+static inline void inc_hash_strength(unsigned long delta) -+{ -+ hash_strength += 1 << delta; -+ if (hash_strength > HASH_STRENGTH_MAX) -+ hash_strength = HASH_STRENGTH_MAX; -+} -+ -+static inline void dec_hash_strength(unsigned long delta) -+{ -+ unsigned long change = 1 << delta; -+ -+ if (hash_strength <= change + 1) -+ hash_strength = 1; -+ else -+ hash_strength -= change; -+} -+ -+static inline void inc_hash_strength_delta(void) -+{ -+ hash_strength_delta++; -+ if (hash_strength_delta > HASH_STRENGTH_DELTA_MAX) -+ hash_strength_delta = HASH_STRENGTH_DELTA_MAX; -+} -+ -+static inline unsigned long get_current_neg_ratio(void) -+{ -+ u64 pos = benefit.pos; -+ u64 neg = benefit.neg; -+ -+ if (!neg) -+ return 0; -+ -+ if (!pos || neg > pos) -+ return 100; -+ -+ if (neg > div64_u64(U64_MAX, 100)) -+ pos = div64_u64(pos, 100); -+ else -+ neg *= 100; -+ -+ return div64_u64(neg, pos); -+} -+ -+static inline unsigned long get_current_benefit(void) -+{ -+ u64 pos = benefit.pos; -+ u64 neg = benefit.neg; -+ u64 scanned = benefit.scanned; -+ -+ if (neg > pos) -+ return 0; -+ -+ return div64_u64((pos - neg), scanned); -+} -+ -+static inline int judge_rshash_direction(void) -+{ -+ u64 current_neg_ratio, stable_benefit; -+ u64 current_benefit, delta = 0; -+ int ret = STILL; -+ -+ /* -+ * Try to probe a value after the boot, and in case the system -+ * are still for a long time. -+ */ -+ if ((fully_scanned_round & 0xFFULL) == 10) { -+ ret = OBSCURE; -+ goto out; -+ } -+ -+ current_neg_ratio = get_current_neg_ratio(); -+ -+ if (current_neg_ratio == 0) { -+ rshash_neg_cont_zero++; -+ if (rshash_neg_cont_zero > 2) -+ return GO_DOWN; -+ else -+ return STILL; -+ } -+ rshash_neg_cont_zero = 0; -+ -+ if (current_neg_ratio > 90) { -+ ret = GO_UP; -+ goto out; -+ } -+ -+ current_benefit = get_current_benefit(); -+ stable_benefit = rshash_state.stable_benefit; -+ -+ if (!stable_benefit) { -+ ret = OBSCURE; -+ goto out; -+ } -+ -+ if (current_benefit > stable_benefit) -+ delta = current_benefit - stable_benefit; -+ else if (current_benefit < stable_benefit) -+ delta = stable_benefit - current_benefit; -+ -+ delta = div64_u64(100 * delta, stable_benefit); -+ -+ if (delta > 50) { -+ rshash_cont_obscure++; -+ if (rshash_cont_obscure > 2) -+ return OBSCURE; -+ else -+ return STILL; -+ } -+ -+out: -+ rshash_cont_obscure = 0; -+ return ret; -+} -+ -+/** -+ * rshash_adjust() - The main function to control the random sampling state -+ * machine for hash strength adapting. -+ * -+ * return true if hash_strength has changed. -+ */ -+static inline int rshash_adjust(void) -+{ -+ unsigned long prev_hash_strength = hash_strength; -+ -+ if (!encode_benefit()) -+ return 0; -+ -+ switch (rshash_state.state) { -+ case RSHASH_STILL: -+ switch (judge_rshash_direction()) { -+ case GO_UP: -+ if (rshash_state.pre_direct == GO_DOWN) -+ hash_strength_delta = 0; -+ -+ inc_hash_strength(hash_strength_delta); -+ inc_hash_strength_delta(); -+ rshash_state.stable_benefit = get_current_benefit(); -+ rshash_state.pre_direct = GO_UP; -+ break; -+ -+ case GO_DOWN: -+ if (rshash_state.pre_direct == GO_UP) -+ hash_strength_delta = 0; -+ -+ dec_hash_strength(hash_strength_delta); -+ inc_hash_strength_delta(); -+ rshash_state.stable_benefit = get_current_benefit(); -+ rshash_state.pre_direct = GO_DOWN; -+ break; -+ -+ case OBSCURE: -+ rshash_state.stable_point = hash_strength; -+ rshash_state.turn_point_down = hash_strength; -+ rshash_state.turn_point_up = hash_strength; -+ rshash_state.turn_benefit_down = get_current_benefit(); -+ rshash_state.turn_benefit_up = get_current_benefit(); -+ rshash_state.lookup_window_index = 0; -+ rshash_state.state = RSHASH_TRYDOWN; -+ dec_hash_strength(hash_strength_delta); -+ inc_hash_strength_delta(); -+ break; -+ -+ case STILL: -+ break; -+ default: -+ BUG(); -+ } -+ break; -+ -+ case RSHASH_TRYDOWN: -+ if (rshash_state.lookup_window_index++ % 5 == 0) -+ rshash_state.below_count = 0; -+ -+ if (get_current_benefit() < rshash_state.stable_benefit) -+ rshash_state.below_count++; -+ else if (get_current_benefit() > -+ rshash_state.turn_benefit_down) { -+ rshash_state.turn_point_down = hash_strength; -+ rshash_state.turn_benefit_down = get_current_benefit(); -+ } -+ -+ if (rshash_state.below_count >= 3 || -+ judge_rshash_direction() == GO_UP || -+ hash_strength == 1) { -+ hash_strength = rshash_state.stable_point; -+ hash_strength_delta = 0; -+ inc_hash_strength(hash_strength_delta); -+ inc_hash_strength_delta(); -+ rshash_state.lookup_window_index = 0; -+ rshash_state.state = RSHASH_TRYUP; -+ hash_strength_delta = 0; -+ } else { -+ dec_hash_strength(hash_strength_delta); -+ inc_hash_strength_delta(); -+ } -+ break; -+ -+ case RSHASH_TRYUP: -+ if (rshash_state.lookup_window_index++ % 5 == 0) -+ rshash_state.below_count = 0; -+ -+ if (get_current_benefit() < rshash_state.turn_benefit_down) -+ rshash_state.below_count++; -+ else if (get_current_benefit() > rshash_state.turn_benefit_up) { -+ rshash_state.turn_point_up = hash_strength; -+ rshash_state.turn_benefit_up = get_current_benefit(); -+ } -+ -+ if (rshash_state.below_count >= 3 || -+ judge_rshash_direction() == GO_DOWN || -+ hash_strength == HASH_STRENGTH_MAX) { -+ hash_strength = rshash_state.turn_benefit_up > -+ rshash_state.turn_benefit_down ? -+ rshash_state.turn_point_up : -+ rshash_state.turn_point_down; -+ -+ rshash_state.state = RSHASH_PRE_STILL; -+ } else { -+ inc_hash_strength(hash_strength_delta); -+ inc_hash_strength_delta(); -+ } -+ -+ break; -+ -+ case RSHASH_NEW: -+ case RSHASH_PRE_STILL: -+ rshash_state.stable_benefit = get_current_benefit(); -+ rshash_state.state = RSHASH_STILL; -+ hash_strength_delta = 0; -+ break; -+ default: -+ BUG(); -+ } -+ -+ /* rshash_neg = rshash_pos = 0; */ -+ reset_benefit(); -+ -+ if (prev_hash_strength != hash_strength) -+ stable_tree_delta_hash(prev_hash_strength); -+ -+ return prev_hash_strength != hash_strength; -+} -+ -+/** -+ * round_update_ladder() - The main function to do update of all the -+ * adjustments whenever a scan round is finished. -+ */ -+static noinline void round_update_ladder(void) -+{ -+ int i; -+ unsigned long dedup; -+ struct vma_slot *slot, *tmp_slot; -+ -+ for (i = 0; i < SCAN_LADDER_SIZE; i++) -+ uksm_scan_ladder[i].flags &= ~UKSM_RUNG_ROUND_FINISHED; -+ -+ list_for_each_entry_safe(slot, tmp_slot, &vma_slot_dedup, dedup_list) { -+ -+ /* slot may be rung_rm_slot() when mm exits */ -+ if (slot->snode) { -+ dedup = cal_dedup_ratio_old(slot); -+ if (dedup && dedup >= uksm_abundant_threshold) -+ vma_rung_up(slot); -+ } -+ -+ slot->pages_bemerged = 0; -+ slot->pages_cowed = 0; -+ -+ list_del_init(&slot->dedup_list); -+ } -+} -+ -+static void uksm_del_vma_slot(struct vma_slot *slot) -+{ -+ int i, j; -+ struct rmap_list_entry *entry; -+ -+ if (slot->snode) { -+ /* -+ * In case it just failed when entering the rung, it's not -+ * necessary. -+ */ -+ rung_rm_slot(slot); -+ } -+ -+ if (!list_empty(&slot->dedup_list)) -+ list_del(&slot->dedup_list); -+ -+ if (!slot->rmap_list_pool || !slot->pool_counts) { -+ /* In case it OOMed in uksm_vma_enter() */ -+ goto out; -+ } -+ -+ for (i = 0; i < slot->pool_size; i++) { -+ void *addr; -+ -+ if (!slot->rmap_list_pool[i]) -+ continue; -+ -+ addr = kmap(slot->rmap_list_pool[i]); -+ for (j = 0; j < PAGE_SIZE / sizeof(*entry); j++) { -+ entry = (struct rmap_list_entry *)addr + j; -+ if (is_addr(entry->addr)) -+ continue; -+ if (!entry->item) -+ continue; -+ -+ remove_rmap_item_from_tree(entry->item); -+ free_rmap_item(entry->item); -+ slot->pool_counts[i]--; -+ } -+ BUG_ON(slot->pool_counts[i]); -+ kunmap(slot->rmap_list_pool[i]); -+ __free_page(slot->rmap_list_pool[i]); -+ } -+ kfree(slot->rmap_list_pool); -+ kfree(slot->pool_counts); -+ -+out: -+ slot->rung = NULL; -+ if (slot->flags & UKSM_SLOT_IN_UKSM) { -+ BUG_ON(uksm_pages_total < slot->pages); -+ uksm_pages_total -= slot->pages; -+ } -+ -+ if (slot->fully_scanned_round == fully_scanned_round) -+ scanned_virtual_pages -= slot->pages; -+ else -+ scanned_virtual_pages -= slot->pages_scanned; -+ free_vma_slot(slot); -+} -+ -+ -+#define SPIN_LOCK_PERIOD 32 -+static struct vma_slot *cleanup_slots[SPIN_LOCK_PERIOD]; -+static inline void cleanup_vma_slots(void) -+{ -+ struct vma_slot *slot; -+ int i; -+ -+ i = 0; -+ spin_lock(&vma_slot_list_lock); -+ while (!list_empty(&vma_slot_del)) { -+ slot = list_entry(vma_slot_del.next, -+ struct vma_slot, slot_list); -+ list_del(&slot->slot_list); -+ cleanup_slots[i++] = slot; -+ if (i == SPIN_LOCK_PERIOD) { -+ spin_unlock(&vma_slot_list_lock); -+ while (--i >= 0) -+ uksm_del_vma_slot(cleanup_slots[i]); -+ i = 0; -+ spin_lock(&vma_slot_list_lock); -+ } -+ } -+ spin_unlock(&vma_slot_list_lock); -+ -+ while (--i >= 0) -+ uksm_del_vma_slot(cleanup_slots[i]); -+} -+ -+/* -+ * Expotional moving average formula -+ */ -+static inline unsigned long ema(unsigned long curr, unsigned long last_ema) -+{ -+ /* -+ * For a very high burst, even the ema cannot work well, a false very -+ * high per-page time estimation can result in feedback in very high -+ * overhead of context switch and rung update -- this will then lead -+ * to higher per-paper time, this may not converge. -+ * -+ * Instead, we try to approach this value in a binary manner. -+ */ -+ if (curr > last_ema * 10) -+ return last_ema * 2; -+ -+ return (EMA_ALPHA * curr + (100 - EMA_ALPHA) * last_ema) / 100; -+} -+ -+/* -+ * convert cpu ratio in 1/TIME_RATIO_SCALE configured by user to -+ * nanoseconds based on current uksm_sleep_jiffies. -+ */ -+static inline unsigned long cpu_ratio_to_nsec(unsigned int ratio) -+{ -+ return NSEC_PER_USEC * jiffies_to_usecs(uksm_sleep_jiffies) / -+ (TIME_RATIO_SCALE - ratio) * ratio; -+} -+ -+ -+static inline unsigned long rung_real_ratio(int cpu_time_ratio) -+{ -+ unsigned long ret; -+ -+ BUG_ON(!cpu_time_ratio); -+ -+ if (cpu_time_ratio > 0) -+ ret = cpu_time_ratio; -+ else -+ ret = (unsigned long)(-cpu_time_ratio) * -+ uksm_max_cpu_percentage / 100UL; -+ -+ return ret ? ret : 1; -+} -+ -+static noinline void uksm_calc_scan_pages(void) -+{ -+ struct scan_rung *ladder = uksm_scan_ladder; -+ unsigned long sleep_usecs, nsecs; -+ unsigned long ratio; -+ int i; -+ unsigned long per_page; -+ -+ if (uksm_ema_page_time > 100000 || -+ (((unsigned long) uksm_eval_round & (256UL - 1)) == 0UL)) -+ uksm_ema_page_time = UKSM_PAGE_TIME_DEFAULT; -+ -+ per_page = uksm_ema_page_time; -+ BUG_ON(!per_page); -+ -+ /* -+ * For every 8 eval round, we try to probe a uksm_sleep_jiffies value -+ * based on saved user input. -+ */ -+ if (((unsigned long) uksm_eval_round & (8UL - 1)) == 0UL) -+ uksm_sleep_jiffies = uksm_sleep_saved; -+ -+ /* We require a rung scan at least 1 page in a period. */ -+ nsecs = per_page; -+ ratio = rung_real_ratio(ladder[0].cpu_ratio); -+ if (cpu_ratio_to_nsec(ratio) < nsecs) { -+ sleep_usecs = nsecs * (TIME_RATIO_SCALE - ratio) / ratio -+ / NSEC_PER_USEC; -+ uksm_sleep_jiffies = usecs_to_jiffies(sleep_usecs) + 1; -+ } -+ -+ for (i = 0; i < SCAN_LADDER_SIZE; i++) { -+ ratio = rung_real_ratio(ladder[i].cpu_ratio); -+ ladder[i].pages_to_scan = cpu_ratio_to_nsec(ratio) / -+ per_page; -+ BUG_ON(!ladder[i].pages_to_scan); -+ uksm_calc_rung_step(&ladder[i], per_page, ratio); -+ } -+} -+ -+/* -+ * From the scan time of this round (ns) to next expected min sleep time -+ * (ms), be careful of the possible overflows. ratio is taken from -+ * rung_real_ratio() -+ */ -+static inline -+unsigned int scan_time_to_sleep(unsigned long long scan_time, unsigned long ratio) -+{ -+ scan_time >>= 20; /* to msec level now */ -+ BUG_ON(scan_time > (ULONG_MAX / TIME_RATIO_SCALE)); -+ -+ return (unsigned int) ((unsigned long) scan_time * -+ (TIME_RATIO_SCALE - ratio) / ratio); -+} -+ -+#define __round_mask(x, y) ((__typeof__(x))((y)-1)) -+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) -+ -+static void uksm_vma_enter(struct vma_slot **slots, unsigned long num) -+{ -+ struct scan_rung *rung; -+ -+ rung = &uksm_scan_ladder[0]; -+ rung_add_new_slots(rung, slots, num); -+} -+ -+static struct vma_slot *batch_slots[SLOT_TREE_NODE_STORE_SIZE]; -+ -+static void uksm_enter_all_slots(void) -+{ -+ struct vma_slot *slot; -+ unsigned long index; -+ struct list_head empty_vma_list; -+ int i; -+ -+ i = 0; -+ index = 0; -+ INIT_LIST_HEAD(&empty_vma_list); -+ -+ spin_lock(&vma_slot_list_lock); -+ while (!list_empty(&vma_slot_new)) { -+ slot = list_entry(vma_slot_new.next, -+ struct vma_slot, slot_list); -+ -+ if (!slot->vma->anon_vma) { -+ list_move(&slot->slot_list, &empty_vma_list); -+ } else if (vma_can_enter(slot->vma)) { -+ batch_slots[index++] = slot; -+ list_del_init(&slot->slot_list); -+ } else { -+ list_move(&slot->slot_list, &vma_slot_noadd); -+ } -+ -+ if (++i == SPIN_LOCK_PERIOD || -+ (index && !(index % SLOT_TREE_NODE_STORE_SIZE))) { -+ spin_unlock(&vma_slot_list_lock); -+ -+ if (index && !(index % SLOT_TREE_NODE_STORE_SIZE)) { -+ uksm_vma_enter(batch_slots, index); -+ index = 0; -+ } -+ i = 0; -+ cond_resched(); -+ spin_lock(&vma_slot_list_lock); -+ } -+ } -+ -+ list_splice(&empty_vma_list, &vma_slot_new); -+ -+ spin_unlock(&vma_slot_list_lock); -+ -+ if (index) -+ uksm_vma_enter(batch_slots, index); -+ -+} -+ -+static inline int rung_round_finished(struct scan_rung *rung) -+{ -+ return rung->flags & UKSM_RUNG_ROUND_FINISHED; -+} -+ -+static inline void judge_slot(struct vma_slot *slot) -+{ -+ struct scan_rung *rung = slot->rung; -+ unsigned long dedup; -+ int deleted; -+ -+ dedup = cal_dedup_ratio(slot); -+ if (vma_fully_scanned(slot) && uksm_thrash_threshold) -+ deleted = vma_rung_enter(slot, &uksm_scan_ladder[0]); -+ else if (dedup && dedup >= uksm_abundant_threshold) -+ deleted = vma_rung_up(slot); -+ else -+ deleted = vma_rung_down(slot); -+ -+ slot->pages_merged = 0; -+ slot->pages_cowed = 0; -+ slot->this_sampled = 0; -+ -+ if (vma_fully_scanned(slot)) -+ slot->pages_scanned = 0; -+ -+ slot->last_scanned = slot->pages_scanned; -+ -+ /* If its deleted in above, then rung was already advanced. */ -+ if (!deleted) -+ advance_current_scan(rung); -+} -+ -+ -+static inline int hash_round_finished(void) -+{ -+ if (scanned_virtual_pages > (uksm_pages_total >> 2)) { -+ scanned_virtual_pages = 0; -+ if (uksm_pages_scanned) -+ fully_scanned_round++; -+ -+ return 1; -+ } else { -+ return 0; -+ } -+} -+ -+#define UKSM_MMSEM_BATCH 5 -+#define BUSY_RETRY 100 -+ -+/** -+ * uksm_do_scan() - the main worker function. -+ */ -+static noinline void uksm_do_scan(void) -+{ -+ struct vma_slot *slot, *iter; -+ struct mm_struct *busy_mm; -+ unsigned char round_finished, all_rungs_emtpy; -+ int i, err, mmsem_batch; -+ unsigned long pcost; -+ long long delta_exec; -+ unsigned long vpages, max_cpu_ratio; -+ unsigned long long start_time, end_time, scan_time; -+ unsigned int expected_jiffies; -+ -+ might_sleep(); -+ -+ vpages = 0; -+ -+ start_time = task_sched_runtime(current); -+ max_cpu_ratio = 0; -+ mmsem_batch = 0; -+ -+ for (i = 0; i < SCAN_LADDER_SIZE;) { -+ struct scan_rung *rung = &uksm_scan_ladder[i]; -+ unsigned long ratio; -+ int busy_retry; -+ -+ if (!rung->pages_to_scan) { -+ i++; -+ continue; -+ } -+ -+ if (!rung->vma_root.num) { -+ rung->pages_to_scan = 0; -+ i++; -+ continue; -+ } -+ -+ ratio = rung_real_ratio(rung->cpu_ratio); -+ if (ratio > max_cpu_ratio) -+ max_cpu_ratio = ratio; -+ -+ busy_retry = BUSY_RETRY; -+ /* -+ * Do not consider rung_round_finished() here, just used up the -+ * rung->pages_to_scan quota. -+ */ -+ while (rung->pages_to_scan && rung->vma_root.num && -+ likely(!freezing(current))) { -+ int reset = 0; -+ -+ slot = rung->current_scan; -+ -+ BUG_ON(vma_fully_scanned(slot)); -+ -+ if (mmsem_batch) -+ err = 0; -+ else -+ err = try_down_read_slot_mmap_sem(slot); -+ -+ if (err == -ENOENT) { -+rm_slot: -+ rung_rm_slot(slot); -+ continue; -+ } -+ -+ busy_mm = slot->mm; -+ -+ if (err == -EBUSY) { -+ /* skip other vmas on the same mm */ -+ do { -+ reset = advance_current_scan(rung); -+ iter = rung->current_scan; -+ busy_retry--; -+ if (iter->vma->vm_mm != busy_mm || -+ !busy_retry || reset) -+ break; -+ } while (1); -+ -+ if (iter->vma->vm_mm != busy_mm) { -+ continue; -+ } else { -+ /* scan round finsished */ -+ break; -+ } -+ } -+ -+ BUG_ON(!vma_can_enter(slot->vma)); -+ if (uksm_test_exit(slot->vma->vm_mm)) { -+ mmsem_batch = 0; -+ up_read(&slot->vma->vm_mm->mmap_sem); -+ goto rm_slot; -+ } -+ -+ if (mmsem_batch) -+ mmsem_batch--; -+ else -+ mmsem_batch = UKSM_MMSEM_BATCH; -+ -+ /* Ok, we have take the mmap_sem, ready to scan */ -+ scan_vma_one_page(slot); -+ rung->pages_to_scan--; -+ vpages++; -+ -+ if (rung->current_offset + rung->step > slot->pages - 1 -+ || vma_fully_scanned(slot)) { -+ up_read(&slot->vma->vm_mm->mmap_sem); -+ judge_slot(slot); -+ mmsem_batch = 0; -+ } else { -+ rung->current_offset += rung->step; -+ if (!mmsem_batch) -+ up_read(&slot->vma->vm_mm->mmap_sem); -+ } -+ -+ busy_retry = BUSY_RETRY; -+ cond_resched(); -+ } -+ -+ if (mmsem_batch) { -+ up_read(&slot->vma->vm_mm->mmap_sem); -+ mmsem_batch = 0; -+ } -+ -+ if (freezing(current)) -+ break; -+ -+ cond_resched(); -+ } -+ end_time = task_sched_runtime(current); -+ delta_exec = end_time - start_time; -+ -+ if (freezing(current)) -+ return; -+ -+ cleanup_vma_slots(); -+ uksm_enter_all_slots(); -+ -+ round_finished = 1; -+ all_rungs_emtpy = 1; -+ for (i = 0; i < SCAN_LADDER_SIZE; i++) { -+ struct scan_rung *rung = &uksm_scan_ladder[i]; -+ -+ if (rung->vma_root.num) { -+ all_rungs_emtpy = 0; -+ if (!rung_round_finished(rung)) -+ round_finished = 0; -+ } -+ } -+ -+ if (all_rungs_emtpy) -+ round_finished = 0; -+ -+ if (round_finished) { -+ round_update_ladder(); -+ uksm_eval_round++; -+ -+ if (hash_round_finished() && rshash_adjust()) { -+ /* Reset the unstable root iff hash strength changed */ -+ uksm_hash_round++; -+ root_unstable_tree = RB_ROOT; -+ free_all_tree_nodes(&unstable_tree_node_list); -+ } -+ -+ /* -+ * A number of pages can hang around indefinitely on per-cpu -+ * pagevecs, raised page count preventing write_protect_page -+ * from merging them. Though it doesn't really matter much, -+ * it is puzzling to see some stuck in pages_volatile until -+ * other activity jostles them out, and they also prevented -+ * LTP's KSM test from succeeding deterministically; so drain -+ * them here (here rather than on entry to uksm_do_scan(), -+ * so we don't IPI too often when pages_to_scan is set low). -+ */ -+ lru_add_drain_all(); -+ } -+ -+ -+ if (vpages && delta_exec > 0) { -+ pcost = (unsigned long) delta_exec / vpages; -+ if (likely(uksm_ema_page_time)) -+ uksm_ema_page_time = ema(pcost, uksm_ema_page_time); -+ else -+ uksm_ema_page_time = pcost; -+ } -+ -+ uksm_calc_scan_pages(); -+ uksm_sleep_real = uksm_sleep_jiffies; -+ /* in case of radical cpu bursts, apply the upper bound */ -+ end_time = task_sched_runtime(current); -+ if (max_cpu_ratio && end_time > start_time) { -+ scan_time = end_time - start_time; -+ expected_jiffies = msecs_to_jiffies( -+ scan_time_to_sleep(scan_time, max_cpu_ratio)); -+ -+ if (expected_jiffies > uksm_sleep_real) -+ uksm_sleep_real = expected_jiffies; -+ -+ /* We have a 1 second up bound for responsiveness. */ -+ if (jiffies_to_msecs(uksm_sleep_real) > MSEC_PER_SEC) -+ uksm_sleep_real = msecs_to_jiffies(1000); -+ } -+ -+ return; -+} -+ -+static int ksmd_should_run(void) -+{ -+ return uksm_run & UKSM_RUN_MERGE; -+} -+ -+static int uksm_scan_thread(void *nothing) -+{ -+ set_freezable(); -+ set_user_nice(current, 5); -+ -+ while (!kthread_should_stop()) { -+ mutex_lock(&uksm_thread_mutex); -+ if (ksmd_should_run()) -+ uksm_do_scan(); -+ mutex_unlock(&uksm_thread_mutex); -+ -+ try_to_freeze(); -+ -+ if (ksmd_should_run()) { -+ schedule_timeout_interruptible(uksm_sleep_real); -+ uksm_sleep_times++; -+ } else { -+ wait_event_freezable(uksm_thread_wait, -+ ksmd_should_run() || kthread_should_stop()); -+ } -+ } -+ return 0; -+} -+ -+void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc) -+{ -+ struct stable_node *stable_node; -+ struct node_vma *node_vma; -+ struct rmap_item *rmap_item; -+ int search_new_forks = 0; -+ unsigned long address; -+ -+ VM_BUG_ON_PAGE(!PageKsm(page), page); -+ VM_BUG_ON_PAGE(!PageLocked(page), page); -+ -+ stable_node = page_stable_node(page); -+ if (!stable_node) -+ return; -+again: -+ hlist_for_each_entry(node_vma, &stable_node->hlist, hlist) { -+ hlist_for_each_entry(rmap_item, &node_vma->rmap_hlist, hlist) { -+ struct anon_vma *anon_vma = rmap_item->anon_vma; -+ struct anon_vma_chain *vmac; -+ struct vm_area_struct *vma; -+ -+ cond_resched(); -+ anon_vma_lock_read(anon_vma); -+ anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, -+ 0, ULONG_MAX) { -+ cond_resched(); -+ vma = vmac->vma; -+ address = get_rmap_addr(rmap_item); -+ -+ if (address < vma->vm_start || -+ address >= vma->vm_end) -+ continue; -+ -+ if ((rmap_item->slot->vma == vma) == -+ search_new_forks) -+ continue; -+ -+ if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) -+ continue; -+ -+ if (!rwc->rmap_one(page, vma, address, rwc->arg)) { -+ anon_vma_unlock_read(anon_vma); -+ return; -+ } -+ -+ if (rwc->done && rwc->done(page)) { -+ anon_vma_unlock_read(anon_vma); -+ return; -+ } -+ } -+ anon_vma_unlock_read(anon_vma); -+ } -+ } -+ if (!search_new_forks++) -+ goto again; -+} -+ -+#ifdef CONFIG_MIGRATION -+/* Common ksm interface but may be specific to uksm */ -+void ksm_migrate_page(struct page *newpage, struct page *oldpage) -+{ -+ struct stable_node *stable_node; -+ -+ VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage); -+ VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); -+ VM_BUG_ON(newpage->mapping != oldpage->mapping); -+ -+ stable_node = page_stable_node(newpage); -+ if (stable_node) { -+ VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage)); -+ stable_node->kpfn = page_to_pfn(newpage); -+ /* -+ * newpage->mapping was set in advance; now we need smp_wmb() -+ * to make sure that the new stable_node->kpfn is visible -+ * to get_ksm_page() before it can see that oldpage->mapping -+ * has gone stale (or that PageSwapCache has been cleared). -+ */ -+ smp_wmb(); -+ set_page_stable_node(oldpage, NULL); -+ } -+} -+#endif /* CONFIG_MIGRATION */ -+ -+#ifdef CONFIG_MEMORY_HOTREMOVE -+static struct stable_node *uksm_check_stable_tree(unsigned long start_pfn, -+ unsigned long end_pfn) -+{ -+ struct rb_node *node; -+ -+ for (node = rb_first(root_stable_treep); node; node = rb_next(node)) { -+ struct stable_node *stable_node; -+ -+ stable_node = rb_entry(node, struct stable_node, node); -+ if (stable_node->kpfn >= start_pfn && -+ stable_node->kpfn < end_pfn) -+ return stable_node; -+ } -+ return NULL; -+} -+ -+static int uksm_memory_callback(struct notifier_block *self, -+ unsigned long action, void *arg) -+{ -+ struct memory_notify *mn = arg; -+ struct stable_node *stable_node; -+ -+ switch (action) { -+ case MEM_GOING_OFFLINE: -+ /* -+ * Keep it very simple for now: just lock out ksmd and -+ * MADV_UNMERGEABLE while any memory is going offline. -+ * mutex_lock_nested() is necessary because lockdep was alarmed -+ * that here we take uksm_thread_mutex inside notifier chain -+ * mutex, and later take notifier chain mutex inside -+ * uksm_thread_mutex to unlock it. But that's safe because both -+ * are inside mem_hotplug_mutex. -+ */ -+ mutex_lock_nested(&uksm_thread_mutex, SINGLE_DEPTH_NESTING); -+ break; -+ -+ case MEM_OFFLINE: -+ /* -+ * Most of the work is done by page migration; but there might -+ * be a few stable_nodes left over, still pointing to struct -+ * pages which have been offlined: prune those from the tree. -+ */ -+ while ((stable_node = uksm_check_stable_tree(mn->start_pfn, -+ mn->start_pfn + mn->nr_pages)) != NULL) -+ remove_node_from_stable_tree(stable_node, 1, 1); -+ /* fallthrough */ -+ -+ case MEM_CANCEL_OFFLINE: -+ mutex_unlock(&uksm_thread_mutex); -+ break; -+ } -+ return NOTIFY_OK; -+} -+#endif /* CONFIG_MEMORY_HOTREMOVE */ -+ -+#ifdef CONFIG_SYSFS -+/* -+ * This all compiles without CONFIG_SYSFS, but is a waste of space. -+ */ -+ -+#define UKSM_ATTR_RO(_name) \ -+ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) -+#define UKSM_ATTR(_name) \ -+ static struct kobj_attribute _name##_attr = \ -+ __ATTR(_name, 0644, _name##_show, _name##_store) -+ -+static ssize_t max_cpu_percentage_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%u\n", uksm_max_cpu_percentage); -+} -+ -+static ssize_t max_cpu_percentage_store(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ const char *buf, size_t count) -+{ -+ unsigned long max_cpu_percentage; -+ int err; -+ -+ err = kstrtoul(buf, 10, &max_cpu_percentage); -+ if (err || max_cpu_percentage > 100) -+ return -EINVAL; -+ -+ if (max_cpu_percentage == 100) -+ max_cpu_percentage = 99; -+ else if (max_cpu_percentage < 10) -+ max_cpu_percentage = 10; -+ -+ uksm_max_cpu_percentage = max_cpu_percentage; -+ -+ return count; -+} -+UKSM_ATTR(max_cpu_percentage); -+ -+static ssize_t sleep_millisecs_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%u\n", jiffies_to_msecs(uksm_sleep_jiffies)); -+} -+ -+static ssize_t sleep_millisecs_store(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ const char *buf, size_t count) -+{ -+ unsigned long msecs; -+ int err; -+ -+ err = kstrtoul(buf, 10, &msecs); -+ if (err || msecs > MSEC_PER_SEC) -+ return -EINVAL; -+ -+ uksm_sleep_jiffies = msecs_to_jiffies(msecs); -+ uksm_sleep_saved = uksm_sleep_jiffies; -+ -+ return count; -+} -+UKSM_ATTR(sleep_millisecs); -+ -+ -+static ssize_t cpu_governor_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ int n = sizeof(uksm_cpu_governor_str) / sizeof(char *); -+ int i; -+ -+ buf[0] = '\0'; -+ for (i = 0; i < n ; i++) { -+ if (uksm_cpu_governor == i) -+ strcat(buf, "["); -+ -+ strcat(buf, uksm_cpu_governor_str[i]); -+ -+ if (uksm_cpu_governor == i) -+ strcat(buf, "]"); -+ -+ strcat(buf, " "); -+ } -+ strcat(buf, "\n"); -+ -+ return strlen(buf); -+} -+ -+static inline void init_performance_values(void) -+{ -+ int i; -+ struct scan_rung *rung; -+ struct uksm_cpu_preset_s *preset = uksm_cpu_preset + uksm_cpu_governor; -+ -+ -+ for (i = 0; i < SCAN_LADDER_SIZE; i++) { -+ rung = uksm_scan_ladder + i; -+ rung->cpu_ratio = preset->cpu_ratio[i]; -+ rung->cover_msecs = preset->cover_msecs[i]; -+ } -+ -+ uksm_max_cpu_percentage = preset->max_cpu; -+} -+ -+static ssize_t cpu_governor_store(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ const char *buf, size_t count) -+{ -+ int n = sizeof(uksm_cpu_governor_str) / sizeof(char *); -+ -+ for (n--; n >= 0 ; n--) { -+ if (!strncmp(buf, uksm_cpu_governor_str[n], -+ strlen(uksm_cpu_governor_str[n]))) -+ break; -+ } -+ -+ if (n < 0) -+ return -EINVAL; -+ else -+ uksm_cpu_governor = n; -+ -+ init_performance_values(); -+ -+ return count; -+} -+UKSM_ATTR(cpu_governor); -+ -+static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr, -+ char *buf) -+{ -+ return sprintf(buf, "%u\n", uksm_run); -+} -+ -+static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr, -+ const char *buf, size_t count) -+{ -+ int err; -+ unsigned long flags; -+ -+ err = kstrtoul(buf, 10, &flags); -+ if (err || flags > UINT_MAX) -+ return -EINVAL; -+ if (flags > UKSM_RUN_MERGE) -+ return -EINVAL; -+ -+ mutex_lock(&uksm_thread_mutex); -+ if (uksm_run != flags) -+ uksm_run = flags; -+ mutex_unlock(&uksm_thread_mutex); -+ -+ if (flags & UKSM_RUN_MERGE) -+ wake_up_interruptible(&uksm_thread_wait); -+ -+ return count; -+} -+UKSM_ATTR(run); -+ -+static ssize_t abundant_threshold_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%u\n", uksm_abundant_threshold); -+} -+ -+static ssize_t abundant_threshold_store(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ const char *buf, size_t count) -+{ -+ int err; -+ unsigned long flags; -+ -+ err = kstrtoul(buf, 10, &flags); -+ if (err || flags > 99) -+ return -EINVAL; -+ -+ uksm_abundant_threshold = flags; -+ -+ return count; -+} -+UKSM_ATTR(abundant_threshold); -+ -+static ssize_t thrash_threshold_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%u\n", uksm_thrash_threshold); -+} -+ -+static ssize_t thrash_threshold_store(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ const char *buf, size_t count) -+{ -+ int err; -+ unsigned long flags; -+ -+ err = kstrtoul(buf, 10, &flags); -+ if (err || flags > 99) -+ return -EINVAL; -+ -+ uksm_thrash_threshold = flags; -+ -+ return count; -+} -+UKSM_ATTR(thrash_threshold); -+ -+static ssize_t cpu_ratios_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ int i, size; -+ struct scan_rung *rung; -+ char *p = buf; -+ -+ for (i = 0; i < SCAN_LADDER_SIZE; i++) { -+ rung = &uksm_scan_ladder[i]; -+ -+ if (rung->cpu_ratio > 0) -+ size = sprintf(p, "%d ", rung->cpu_ratio); -+ else -+ size = sprintf(p, "MAX/%d ", -+ TIME_RATIO_SCALE / -rung->cpu_ratio); -+ -+ p += size; -+ } -+ -+ *p++ = '\n'; -+ *p = '\0'; -+ -+ return p - buf; -+} -+ -+static ssize_t cpu_ratios_store(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ const char *buf, size_t count) -+{ -+ int i, cpuratios[SCAN_LADDER_SIZE], err; -+ unsigned long value; -+ struct scan_rung *rung; -+ char *p, *end = NULL; -+ -+ p = kzalloc(count, GFP_KERNEL); -+ if (!p) -+ return -ENOMEM; -+ -+ memcpy(p, buf, count); -+ -+ for (i = 0; i < SCAN_LADDER_SIZE; i++) { -+ if (i != SCAN_LADDER_SIZE - 1) { -+ end = strchr(p, ' '); -+ if (!end) -+ return -EINVAL; -+ -+ *end = '\0'; -+ } -+ -+ if (strstr(p, "MAX/")) { -+ p = strchr(p, '/') + 1; -+ err = kstrtoul(p, 10, &value); -+ if (err || value > TIME_RATIO_SCALE || !value) -+ return -EINVAL; -+ -+ cpuratios[i] = -(int) (TIME_RATIO_SCALE / value); -+ } else { -+ err = kstrtoul(p, 10, &value); -+ if (err || value > TIME_RATIO_SCALE || !value) -+ return -EINVAL; -+ -+ cpuratios[i] = value; -+ } -+ -+ p = end + 1; -+ } -+ -+ for (i = 0; i < SCAN_LADDER_SIZE; i++) { -+ rung = &uksm_scan_ladder[i]; -+ -+ rung->cpu_ratio = cpuratios[i]; -+ } -+ -+ return count; -+} -+UKSM_ATTR(cpu_ratios); -+ -+static ssize_t eval_intervals_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ int i, size; -+ struct scan_rung *rung; -+ char *p = buf; -+ -+ for (i = 0; i < SCAN_LADDER_SIZE; i++) { -+ rung = &uksm_scan_ladder[i]; -+ size = sprintf(p, "%u ", rung->cover_msecs); -+ p += size; -+ } -+ -+ *p++ = '\n'; -+ *p = '\0'; -+ -+ return p - buf; -+} -+ -+static ssize_t eval_intervals_store(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ const char *buf, size_t count) -+{ -+ int i, err; -+ unsigned long values[SCAN_LADDER_SIZE]; -+ struct scan_rung *rung; -+ char *p, *end = NULL; -+ ssize_t ret = count; -+ -+ p = kzalloc(count + 2, GFP_KERNEL); -+ if (!p) -+ return -ENOMEM; -+ -+ memcpy(p, buf, count); -+ -+ for (i = 0; i < SCAN_LADDER_SIZE; i++) { -+ if (i != SCAN_LADDER_SIZE - 1) { -+ end = strchr(p, ' '); -+ if (!end) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ *end = '\0'; -+ } -+ -+ err = kstrtoul(p, 10, &values[i]); -+ if (err) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ p = end + 1; -+ } -+ -+ for (i = 0; i < SCAN_LADDER_SIZE; i++) { -+ rung = &uksm_scan_ladder[i]; -+ -+ rung->cover_msecs = values[i]; -+ } -+ -+out: -+ kfree(p); -+ return ret; -+} -+UKSM_ATTR(eval_intervals); -+ -+static ssize_t ema_per_page_time_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%lu\n", uksm_ema_page_time); -+} -+UKSM_ATTR_RO(ema_per_page_time); -+ -+static ssize_t pages_shared_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%lu\n", uksm_pages_shared); -+} -+UKSM_ATTR_RO(pages_shared); -+ -+static ssize_t pages_sharing_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%lu\n", uksm_pages_sharing); -+} -+UKSM_ATTR_RO(pages_sharing); -+ -+static ssize_t pages_unshared_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%lu\n", uksm_pages_unshared); -+} -+UKSM_ATTR_RO(pages_unshared); -+ -+static ssize_t full_scans_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%llu\n", fully_scanned_round); -+} -+UKSM_ATTR_RO(full_scans); -+ -+static ssize_t pages_scanned_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ unsigned long base = 0; -+ u64 delta, ret; -+ -+ if (pages_scanned_stored) { -+ base = pages_scanned_base; -+ ret = pages_scanned_stored; -+ delta = uksm_pages_scanned >> base; -+ if (CAN_OVERFLOW_U64(ret, delta)) { -+ ret >>= 1; -+ delta >>= 1; -+ base++; -+ ret += delta; -+ } -+ } else { -+ ret = uksm_pages_scanned; -+ } -+ -+ while (ret > ULONG_MAX) { -+ ret >>= 1; -+ base++; -+ } -+ -+ if (base) -+ return sprintf(buf, "%lu * 2^%lu\n", (unsigned long)ret, base); -+ else -+ return sprintf(buf, "%lu\n", (unsigned long)ret); -+} -+UKSM_ATTR_RO(pages_scanned); -+ -+static ssize_t hash_strength_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%lu\n", hash_strength); -+} -+UKSM_ATTR_RO(hash_strength); -+ -+static ssize_t sleep_times_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%llu\n", uksm_sleep_times); -+} -+UKSM_ATTR_RO(sleep_times); -+ -+ -+static struct attribute *uksm_attrs[] = { -+ &max_cpu_percentage_attr.attr, -+ &sleep_millisecs_attr.attr, -+ &cpu_governor_attr.attr, -+ &run_attr.attr, -+ &ema_per_page_time_attr.attr, -+ &pages_shared_attr.attr, -+ &pages_sharing_attr.attr, -+ &pages_unshared_attr.attr, -+ &full_scans_attr.attr, -+ &pages_scanned_attr.attr, -+ &hash_strength_attr.attr, -+ &sleep_times_attr.attr, -+ &thrash_threshold_attr.attr, -+ &abundant_threshold_attr.attr, -+ &cpu_ratios_attr.attr, -+ &eval_intervals_attr.attr, -+ NULL, -+}; -+ -+static struct attribute_group uksm_attr_group = { -+ .attrs = uksm_attrs, -+ .name = "uksm", -+}; -+#endif /* CONFIG_SYSFS */ -+ -+static inline void init_scan_ladder(void) -+{ -+ int i; -+ struct scan_rung *rung; -+ -+ for (i = 0; i < SCAN_LADDER_SIZE; i++) { -+ rung = uksm_scan_ladder + i; -+ slot_tree_init_root(&rung->vma_root); -+ } -+ -+ init_performance_values(); -+ uksm_calc_scan_pages(); -+} -+ -+static inline int cal_positive_negative_costs(void) -+{ -+ struct page *p1, *p2; -+ unsigned char *addr1, *addr2; -+ unsigned long i, time_start, hash_cost; -+ unsigned long loopnum = 0; -+ -+ /*IMPORTANT: volatile is needed to prevent over-optimization by gcc. */ -+ volatile u32 hash; -+ volatile int ret; -+ -+ p1 = alloc_page(GFP_KERNEL); -+ if (!p1) -+ return -ENOMEM; -+ -+ p2 = alloc_page(GFP_KERNEL); -+ if (!p2) -+ return -ENOMEM; -+ -+ addr1 = kmap_atomic(p1); -+ addr2 = kmap_atomic(p2); -+ memset(addr1, prandom_u32(), PAGE_SIZE); -+ memcpy(addr2, addr1, PAGE_SIZE); -+ -+ /* make sure that the two pages differ in last byte */ -+ addr2[PAGE_SIZE-1] = ~addr2[PAGE_SIZE-1]; -+ kunmap_atomic(addr2); -+ kunmap_atomic(addr1); -+ -+ time_start = jiffies; -+ while (jiffies - time_start < 100) { -+ for (i = 0; i < 100; i++) -+ hash = page_hash(p1, HASH_STRENGTH_FULL, 0); -+ loopnum += 100; -+ } -+ hash_cost = (jiffies - time_start); -+ -+ time_start = jiffies; -+ for (i = 0; i < loopnum; i++) -+ ret = pages_identical_with_cost(p1, p2); -+ memcmp_cost = HASH_STRENGTH_FULL * (jiffies - time_start); -+ memcmp_cost /= hash_cost; -+ pr_info("UKSM: relative memcmp_cost = %lu " -+ "hash=%u cmp_ret=%d.\n", -+ memcmp_cost, hash, ret); -+ -+ __free_page(p1); -+ __free_page(p2); -+ return 0; -+} -+ -+static int init_zeropage_hash_table(void) -+{ -+ struct page *page; -+ char *addr; -+ int i; -+ -+ page = alloc_page(GFP_KERNEL); -+ if (!page) -+ return -ENOMEM; -+ -+ addr = kmap_atomic(page); -+ memset(addr, 0, PAGE_SIZE); -+ kunmap_atomic(addr); -+ -+ zero_hash_table = kmalloc_array(HASH_STRENGTH_MAX, sizeof(u32), -+ GFP_KERNEL); -+ if (!zero_hash_table) -+ return -ENOMEM; -+ -+ for (i = 0; i < HASH_STRENGTH_MAX; i++) -+ zero_hash_table[i] = page_hash(page, i, 0); -+ -+ __free_page(page); -+ -+ return 0; -+} -+ -+static inline int init_random_sampling(void) -+{ -+ unsigned long i; -+ -+ random_nums = kmalloc(PAGE_SIZE, GFP_KERNEL); -+ if (!random_nums) -+ return -ENOMEM; -+ -+ for (i = 0; i < HASH_STRENGTH_FULL; i++) -+ random_nums[i] = i; -+ -+ for (i = 0; i < HASH_STRENGTH_FULL; i++) { -+ unsigned long rand_range, swap_index, tmp; -+ -+ rand_range = HASH_STRENGTH_FULL - i; -+ swap_index = i + prandom_u32() % rand_range; -+ tmp = random_nums[i]; -+ random_nums[i] = random_nums[swap_index]; -+ random_nums[swap_index] = tmp; -+ } -+ -+ rshash_state.state = RSHASH_NEW; -+ rshash_state.below_count = 0; -+ rshash_state.lookup_window_index = 0; -+ -+ return cal_positive_negative_costs(); -+} -+ -+static int __init uksm_slab_init(void) -+{ -+ rmap_item_cache = UKSM_KMEM_CACHE(rmap_item, 0); -+ if (!rmap_item_cache) -+ goto out; -+ -+ stable_node_cache = UKSM_KMEM_CACHE(stable_node, 0); -+ if (!stable_node_cache) -+ goto out_free1; -+ -+ node_vma_cache = UKSM_KMEM_CACHE(node_vma, 0); -+ if (!node_vma_cache) -+ goto out_free2; -+ -+ vma_slot_cache = UKSM_KMEM_CACHE(vma_slot, 0); -+ if (!vma_slot_cache) -+ goto out_free3; -+ -+ tree_node_cache = UKSM_KMEM_CACHE(tree_node, 0); -+ if (!tree_node_cache) -+ goto out_free4; -+ -+ return 0; -+ -+out_free4: -+ kmem_cache_destroy(vma_slot_cache); -+out_free3: -+ kmem_cache_destroy(node_vma_cache); -+out_free2: -+ kmem_cache_destroy(stable_node_cache); -+out_free1: -+ kmem_cache_destroy(rmap_item_cache); -+out: -+ return -ENOMEM; -+} -+ -+static void __init uksm_slab_free(void) -+{ -+ kmem_cache_destroy(stable_node_cache); -+ kmem_cache_destroy(rmap_item_cache); -+ kmem_cache_destroy(node_vma_cache); -+ kmem_cache_destroy(vma_slot_cache); -+ kmem_cache_destroy(tree_node_cache); -+} -+ -+/* Common interface to ksm, different to it. */ -+int ksm_madvise(struct vm_area_struct *vma, unsigned long start, -+ unsigned long end, int advice, unsigned long *vm_flags) -+{ -+ int err; -+ -+ switch (advice) { -+ case MADV_MERGEABLE: -+ return 0; /* just ignore the advice */ -+ -+ case MADV_UNMERGEABLE: -+ if (!(*vm_flags & VM_MERGEABLE) || !uksm_flags_can_scan(*vm_flags)) -+ return 0; /* just ignore the advice */ -+ -+ if (vma->anon_vma) { -+ err = unmerge_uksm_pages(vma, start, end); -+ if (err) -+ return err; -+ } -+ -+ uksm_remove_vma(vma); -+ *vm_flags &= ~VM_MERGEABLE; -+ break; -+ } -+ -+ return 0; -+} -+ -+/* Common interface to ksm, actually the same. */ -+struct page *ksm_might_need_to_copy(struct page *page, -+ struct vm_area_struct *vma, unsigned long address) -+{ -+ struct anon_vma *anon_vma = page_anon_vma(page); -+ struct page *new_page; -+ -+ if (PageKsm(page)) { -+ if (page_stable_node(page)) -+ return page; /* no need to copy it */ -+ } else if (!anon_vma) { -+ return page; /* no need to copy it */ -+ } else if (anon_vma->root == vma->anon_vma->root && -+ page->index == linear_page_index(vma, address)) { -+ return page; /* still no need to copy it */ -+ } -+ if (!PageUptodate(page)) -+ return page; /* let do_swap_page report the error */ -+ -+ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); -+ if (new_page) { -+ copy_user_highpage(new_page, page, address, vma); -+ -+ SetPageDirty(new_page); -+ __SetPageUptodate(new_page); -+ __SetPageLocked(new_page); -+ } -+ -+ return new_page; -+} -+ -+/* Copied from mm/ksm.c and required from 5.1 */ -+bool reuse_ksm_page(struct page *page, -+ struct vm_area_struct *vma, -+ unsigned long address) -+{ -+#ifdef CONFIG_DEBUG_VM -+ if (WARN_ON(is_zero_pfn(page_to_pfn(page))) || -+ WARN_ON(!page_mapped(page)) || -+ WARN_ON(!PageLocked(page))) { -+ dump_page(page, "reuse_ksm_page"); -+ return false; -+ } -+#endif -+ -+ if (PageSwapCache(page) || !page_stable_node(page)) -+ return false; -+ /* Prohibit parallel get_ksm_page() */ -+ if (!page_ref_freeze(page, 1)) -+ return false; -+ -+ page_move_anon_rmap(page, vma); -+ page->index = linear_page_index(vma, address); -+ page_ref_unfreeze(page, 1); -+ -+ return true; -+} -+ -+static int __init uksm_init(void) -+{ -+ struct task_struct *uksm_thread; -+ int err; -+ -+ uksm_sleep_jiffies = msecs_to_jiffies(100); -+ uksm_sleep_saved = uksm_sleep_jiffies; -+ -+ slot_tree_init(); -+ init_scan_ladder(); -+ -+ -+ err = init_random_sampling(); -+ if (err) -+ goto out_free2; -+ -+ err = uksm_slab_init(); -+ if (err) -+ goto out_free1; -+ -+ err = init_zeropage_hash_table(); -+ if (err) -+ goto out_free0; -+ -+ uksm_thread = kthread_run(uksm_scan_thread, NULL, "uksmd"); -+ if (IS_ERR(uksm_thread)) { -+ pr_err("uksm: creating kthread failed\n"); -+ err = PTR_ERR(uksm_thread); -+ goto out_free; -+ } -+ -+#ifdef CONFIG_SYSFS -+ err = sysfs_create_group(mm_kobj, &uksm_attr_group); -+ if (err) { -+ pr_err("uksm: register sysfs failed\n"); -+ kthread_stop(uksm_thread); -+ goto out_free; -+ } -+#else -+ uksm_run = UKSM_RUN_MERGE; /* no way for user to start it */ -+ -+#endif /* CONFIG_SYSFS */ -+ -+#ifdef CONFIG_MEMORY_HOTREMOVE -+ /* -+ * Choose a high priority since the callback takes uksm_thread_mutex: -+ * later callbacks could only be taking locks which nest within that. -+ */ -+ hotplug_memory_notifier(uksm_memory_callback, 100); -+#endif -+ return 0; -+ -+out_free: -+ kfree(zero_hash_table); -+out_free0: -+ uksm_slab_free(); -+out_free1: -+ kfree(random_nums); -+out_free2: -+ kfree(uksm_scan_ladder); -+ return err; -+} -+ -+#ifdef MODULE -+subsys_initcall(ksm_init); -+#else -+late_initcall(uksm_init); -+#endif -+ -diff --git a/mm/vmstat.c b/mm/vmstat.c -index a8222041bd44..7058e8322cbd 100644 ---- a/mm/vmstat.c -+++ b/mm/vmstat.c -@@ -1168,6 +1168,9 @@ const char * const vmstat_text[] = { - "nr_written", - "nr_kernel_misc_reclaimable", - -+#ifdef CONFIG_UKSM -+ "nr_uksm_zero_pages", -+#endif - /* enum writeback_stat_item counters */ - "nr_dirty_threshold", - "nr_dirty_background_threshold", diff --git a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/4501_muqss.patch b/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/4501_muqss.patch deleted file mode 100644 index 217737ab9..000000000 --- a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/4501_muqss.patch +++ /dev/null @@ -1,10814 +0,0 @@ -# Calculate format=diff merge(sys-kernel/calculate-sources[muqss])!= -diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index 8dee8f68fe15..e56fb275f607 100644 ---- a/Documentation/admin-guide/kernel-parameters.txt -+++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -4277,6 +4277,14 @@ - Memory area to be used by remote processor image, - managed by CMA. - -+ rqshare= [X86] Select the MuQSS scheduler runqueue sharing type. -+ Format: -+ smt -- Share SMT (hyperthread) sibling runqueues -+ mc -- Share MC (multicore) sibling runqueues -+ smp -- Share SMP runqueues -+ none -- So not share any runqueues -+ Default value is mc -+ - rw [KNL] Mount root device read-write on boot - - S [KNL] Run init in single mode -diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst -index 032c7cd3cede..ff41dfacb34b 100644 ---- a/Documentation/admin-guide/sysctl/kernel.rst -+++ b/Documentation/admin-guide/sysctl/kernel.rst -@@ -46,6 +46,7 @@ show up in /proc/sys/kernel: - - hung_task_check_interval_secs - - hung_task_warnings - - hyperv_record_panic_msg -+- iso_cpu - - kexec_load_disabled - - kptr_restrict - - l2cr [ PPC only ] -@@ -82,6 +83,7 @@ show up in /proc/sys/kernel: - - randomize_va_space - - real-root-dev ==> Documentation/admin-guide/initrd.rst - - reboot-cmd [ SPARC only ] -+- rr_interval - - rtsig-max - - rtsig-nr - - sched_energy_aware -@@ -105,6 +107,7 @@ show up in /proc/sys/kernel: - - unknown_nmi_panic - - watchdog - - watchdog_thresh -+- yield_type - - version - - -@@ -438,6 +441,16 @@ When kptr_restrict is set to (2), kernel pointers printed using - %pK will be replaced with 0's regardless of privileges. - - -+iso_cpu: (MuQSS CPU scheduler only) -+=================================== -+ -+This sets the percentage cpu that the unprivileged SCHED_ISO tasks can -+run effectively at realtime priority, averaged over a rolling five -+seconds over the -whole- system, meaning all cpus. -+ -+Set to 70 (percent) by default. -+ -+ - l2cr: (PPC only) - ================ - -@@ -905,6 +918,20 @@ ROM/Flash boot loader. Maybe to tell it what to do after - rebooting. ??? - - -+rr_interval: (MuQSS CPU scheduler only) -+======================================= -+ -+This is the smallest duration that any cpu process scheduling unit -+will run for. Increasing this value can increase throughput of cpu -+bound tasks substantially but at the expense of increased latencies -+overall. Conversely decreasing it will decrease average and maximum -+latencies but at the expense of throughput. This value is in -+milliseconds and the default value chosen depends on the number of -+cpus available at scheduler initialisation with a minimum of 6. -+ -+Valid values are from 1-1000. -+ -+ - rtsig-max & rtsig-nr: - ===================== - -@@ -1175,3 +1202,13 @@ is 10 seconds. - - The softlockup threshold is (2 * watchdog_thresh). Setting this - tunable to zero will disable lockup detection altogether. -+ -+ -+yield_type: (MuQSS CPU scheduler only) -+====================================== -+ -+This determines what type of yield calls to sched_yield will perform. -+ -+ 0: No yield. -+ 1: Yield only to better priority/deadline tasks. (default) -+ 2: Expire timeslice and recalculate deadline. -diff --git a/Documentation/scheduler/sched-BFS.txt b/Documentation/scheduler/sched-BFS.txt -new file mode 100644 -index 000000000000..c0282002a079 ---- /dev/null -+++ b/Documentation/scheduler/sched-BFS.txt -@@ -0,0 +1,351 @@ -+BFS - The Brain Fuck Scheduler by Con Kolivas. -+ -+Goals. -+ -+The goal of the Brain Fuck Scheduler, referred to as BFS from here on, is to -+completely do away with the complex designs of the past for the cpu process -+scheduler and instead implement one that is very simple in basic design. -+The main focus of BFS is to achieve excellent desktop interactivity and -+responsiveness without heuristics and tuning knobs that are difficult to -+understand, impossible to model and predict the effect of, and when tuned to -+one workload cause massive detriment to another. -+ -+ -+Design summary. -+ -+BFS is best described as a single runqueue, O(n) lookup, earliest effective -+virtual deadline first design, loosely based on EEVDF (earliest eligible virtual -+deadline first) and my previous Staircase Deadline scheduler. Each component -+shall be described in order to understand the significance of, and reasoning for -+it. The codebase when the first stable version was released was approximately -+9000 lines less code than the existing mainline linux kernel scheduler (in -+2.6.31). This does not even take into account the removal of documentation and -+the cgroups code that is not used. -+ -+Design reasoning. -+ -+The single runqueue refers to the queued but not running processes for the -+entire system, regardless of the number of CPUs. The reason for going back to -+a single runqueue design is that once multiple runqueues are introduced, -+per-CPU or otherwise, there will be complex interactions as each runqueue will -+be responsible for the scheduling latency and fairness of the tasks only on its -+own runqueue, and to achieve fairness and low latency across multiple CPUs, any -+advantage in throughput of having CPU local tasks causes other disadvantages. -+This is due to requiring a very complex balancing system to at best achieve some -+semblance of fairness across CPUs and can only maintain relatively low latency -+for tasks bound to the same CPUs, not across them. To increase said fairness -+and latency across CPUs, the advantage of local runqueue locking, which makes -+for better scalability, is lost due to having to grab multiple locks. -+ -+A significant feature of BFS is that all accounting is done purely based on CPU -+used and nowhere is sleep time used in any way to determine entitlement or -+interactivity. Interactivity "estimators" that use some kind of sleep/run -+algorithm are doomed to fail to detect all interactive tasks, and to falsely tag -+tasks that aren't interactive as being so. The reason for this is that it is -+close to impossible to determine that when a task is sleeping, whether it is -+doing it voluntarily, as in a userspace application waiting for input in the -+form of a mouse click or otherwise, or involuntarily, because it is waiting for -+another thread, process, I/O, kernel activity or whatever. Thus, such an -+estimator will introduce corner cases, and more heuristics will be required to -+cope with those corner cases, introducing more corner cases and failed -+interactivity detection and so on. Interactivity in BFS is built into the design -+by virtue of the fact that tasks that are waking up have not used up their quota -+of CPU time, and have earlier effective deadlines, thereby making it very likely -+they will preempt any CPU bound task of equivalent nice level. See below for -+more information on the virtual deadline mechanism. Even if they do not preempt -+a running task, because the rr interval is guaranteed to have a bound upper -+limit on how long a task will wait for, it will be scheduled within a timeframe -+that will not cause visible interface jitter. -+ -+ -+Design details. -+ -+Task insertion. -+ -+BFS inserts tasks into each relevant queue as an O(1) insertion into a double -+linked list. On insertion, *every* running queue is checked to see if the newly -+queued task can run on any idle queue, or preempt the lowest running task on the -+system. This is how the cross-CPU scheduling of BFS achieves significantly lower -+latency per extra CPU the system has. In this case the lookup is, in the worst -+case scenario, O(n) where n is the number of CPUs on the system. -+ -+Data protection. -+ -+BFS has one single lock protecting the process local data of every task in the -+global queue. Thus every insertion, removal and modification of task data in the -+global runqueue needs to grab the global lock. However, once a task is taken by -+a CPU, the CPU has its own local data copy of the running process' accounting -+information which only that CPU accesses and modifies (such as during a -+timer tick) thus allowing the accounting data to be updated lockless. Once a -+CPU has taken a task to run, it removes it from the global queue. Thus the -+global queue only ever has, at most, -+ -+ (number of tasks requesting cpu time) - (number of logical CPUs) + 1 -+ -+tasks in the global queue. This value is relevant for the time taken to look up -+tasks during scheduling. This will increase if many tasks with CPU affinity set -+in their policy to limit which CPUs they're allowed to run on if they outnumber -+the number of CPUs. The +1 is because when rescheduling a task, the CPU's -+currently running task is put back on the queue. Lookup will be described after -+the virtual deadline mechanism is explained. -+ -+Virtual deadline. -+ -+The key to achieving low latency, scheduling fairness, and "nice level" -+distribution in BFS is entirely in the virtual deadline mechanism. The one -+tunable in BFS is the rr_interval, or "round robin interval". This is the -+maximum time two SCHED_OTHER (or SCHED_NORMAL, the common scheduling policy) -+tasks of the same nice level will be running for, or looking at it the other -+way around, the longest duration two tasks of the same nice level will be -+delayed for. When a task requests cpu time, it is given a quota (time_slice) -+equal to the rr_interval and a virtual deadline. The virtual deadline is -+offset from the current time in jiffies by this equation: -+ -+ jiffies + (prio_ratio * rr_interval) -+ -+The prio_ratio is determined as a ratio compared to the baseline of nice -20 -+and increases by 10% per nice level. The deadline is a virtual one only in that -+no guarantee is placed that a task will actually be scheduled by this time, but -+it is used to compare which task should go next. There are three components to -+how a task is next chosen. First is time_slice expiration. If a task runs out -+of its time_slice, it is descheduled, the time_slice is refilled, and the -+deadline reset to that formula above. Second is sleep, where a task no longer -+is requesting CPU for whatever reason. The time_slice and deadline are _not_ -+adjusted in this case and are just carried over for when the task is next -+scheduled. Third is preemption, and that is when a newly waking task is deemed -+higher priority than a currently running task on any cpu by virtue of the fact -+that it has an earlier virtual deadline than the currently running task. The -+earlier deadline is the key to which task is next chosen for the first and -+second cases. Once a task is descheduled, it is put back on the queue, and an -+O(n) lookup of all queued-but-not-running tasks is done to determine which has -+the earliest deadline and that task is chosen to receive CPU next. -+ -+The CPU proportion of different nice tasks works out to be approximately the -+ -+ (prio_ratio difference)^2 -+ -+The reason it is squared is that a task's deadline does not change while it is -+running unless it runs out of time_slice. Thus, even if the time actually -+passes the deadline of another task that is queued, it will not get CPU time -+unless the current running task deschedules, and the time "base" (jiffies) is -+constantly moving. -+ -+Task lookup. -+ -+BFS has 103 priority queues. 100 of these are dedicated to the static priority -+of realtime tasks, and the remaining 3 are, in order of best to worst priority, -+SCHED_ISO (isochronous), SCHED_NORMAL, and SCHED_IDLEPRIO (idle priority -+scheduling). When a task of these priorities is queued, a bitmap of running -+priorities is set showing which of these priorities has tasks waiting for CPU -+time. When a CPU is made to reschedule, the lookup for the next task to get -+CPU time is performed in the following way: -+ -+First the bitmap is checked to see what static priority tasks are queued. If -+any realtime priorities are found, the corresponding queue is checked and the -+first task listed there is taken (provided CPU affinity is suitable) and lookup -+is complete. If the priority corresponds to a SCHED_ISO task, they are also -+taken in FIFO order (as they behave like SCHED_RR). If the priority corresponds -+to either SCHED_NORMAL or SCHED_IDLEPRIO, then the lookup becomes O(n). At this -+stage, every task in the runlist that corresponds to that priority is checked -+to see which has the earliest set deadline, and (provided it has suitable CPU -+affinity) it is taken off the runqueue and given the CPU. If a task has an -+expired deadline, it is taken and the rest of the lookup aborted (as they are -+chosen in FIFO order). -+ -+Thus, the lookup is O(n) in the worst case only, where n is as described -+earlier, as tasks may be chosen before the whole task list is looked over. -+ -+ -+Scalability. -+ -+The major limitations of BFS will be that of scalability, as the separate -+runqueue designs will have less lock contention as the number of CPUs rises. -+However they do not scale linearly even with separate runqueues as multiple -+runqueues will need to be locked concurrently on such designs to be able to -+achieve fair CPU balancing, to try and achieve some sort of nice-level fairness -+across CPUs, and to achieve low enough latency for tasks on a busy CPU when -+other CPUs would be more suited. BFS has the advantage that it requires no -+balancing algorithm whatsoever, as balancing occurs by proxy simply because -+all CPUs draw off the global runqueue, in priority and deadline order. Despite -+the fact that scalability is _not_ the prime concern of BFS, it both shows very -+good scalability to smaller numbers of CPUs and is likely a more scalable design -+at these numbers of CPUs. -+ -+It also has some very low overhead scalability features built into the design -+when it has been deemed their overhead is so marginal that they're worth adding. -+The first is the local copy of the running process' data to the CPU it's running -+on to allow that data to be updated lockless where possible. Then there is -+deference paid to the last CPU a task was running on, by trying that CPU first -+when looking for an idle CPU to use the next time it's scheduled. Finally there -+is the notion of cache locality beyond the last running CPU. The sched_domains -+information is used to determine the relative virtual "cache distance" that -+other CPUs have from the last CPU a task was running on. CPUs with shared -+caches, such as SMT siblings, or multicore CPUs with shared caches, are treated -+as cache local. CPUs without shared caches are treated as not cache local, and -+CPUs on different NUMA nodes are treated as very distant. This "relative cache -+distance" is used by modifying the virtual deadline value when doing lookups. -+Effectively, the deadline is unaltered between "cache local" CPUs, doubled for -+"cache distant" CPUs, and quadrupled for "very distant" CPUs. The reasoning -+behind the doubling of deadlines is as follows. The real cost of migrating a -+task from one CPU to another is entirely dependant on the cache footprint of -+the task, how cache intensive the task is, how long it's been running on that -+CPU to take up the bulk of its cache, how big the CPU cache is, how fast and -+how layered the CPU cache is, how fast a context switch is... and so on. In -+other words, it's close to random in the real world where we do more than just -+one sole workload. The only thing we can be sure of is that it's not free. So -+BFS uses the principle that an idle CPU is a wasted CPU and utilising idle CPUs -+is more important than cache locality, and cache locality only plays a part -+after that. Doubling the effective deadline is based on the premise that the -+"cache local" CPUs will tend to work on the same tasks up to double the number -+of cache local CPUs, and once the workload is beyond that amount, it is likely -+that none of the tasks are cache warm anywhere anyway. The quadrupling for NUMA -+is a value I pulled out of my arse. -+ -+When choosing an idle CPU for a waking task, the cache locality is determined -+according to where the task last ran and then idle CPUs are ranked from best -+to worst to choose the most suitable idle CPU based on cache locality, NUMA -+node locality and hyperthread sibling business. They are chosen in the -+following preference (if idle): -+ -+* Same core, idle or busy cache, idle threads -+* Other core, same cache, idle or busy cache, idle threads. -+* Same node, other CPU, idle cache, idle threads. -+* Same node, other CPU, busy cache, idle threads. -+* Same core, busy threads. -+* Other core, same cache, busy threads. -+* Same node, other CPU, busy threads. -+* Other node, other CPU, idle cache, idle threads. -+* Other node, other CPU, busy cache, idle threads. -+* Other node, other CPU, busy threads. -+ -+This shows the SMT or "hyperthread" awareness in the design as well which will -+choose a real idle core first before a logical SMT sibling which already has -+tasks on the physical CPU. -+ -+Early benchmarking of BFS suggested scalability dropped off at the 16 CPU mark. -+However this benchmarking was performed on an earlier design that was far less -+scalable than the current one so it's hard to know how scalable it is in terms -+of both CPUs (due to the global runqueue) and heavily loaded machines (due to -+O(n) lookup) at this stage. Note that in terms of scalability, the number of -+_logical_ CPUs matters, not the number of _physical_ CPUs. Thus, a dual (2x) -+quad core (4X) hyperthreaded (2X) machine is effectively a 16X. Newer benchmark -+results are very promising indeed, without needing to tweak any knobs, features -+or options. Benchmark contributions are most welcome. -+ -+ -+Features -+ -+As the initial prime target audience for BFS was the average desktop user, it -+was designed to not need tweaking, tuning or have features set to obtain benefit -+from it. Thus the number of knobs and features has been kept to an absolute -+minimum and should not require extra user input for the vast majority of cases. -+There are precisely 2 tunables, and 2 extra scheduling policies. The rr_interval -+and iso_cpu tunables, and the SCHED_ISO and SCHED_IDLEPRIO policies. In addition -+to this, BFS also uses sub-tick accounting. What BFS does _not_ now feature is -+support for CGROUPS. The average user should neither need to know what these -+are, nor should they need to be using them to have good desktop behaviour. -+ -+rr_interval -+ -+There is only one "scheduler" tunable, the round robin interval. This can be -+accessed in -+ -+ /proc/sys/kernel/rr_interval -+ -+The value is in milliseconds, and the default value is set to 6 on a -+uniprocessor machine, and automatically set to a progressively higher value on -+multiprocessor machines. The reasoning behind increasing the value on more CPUs -+is that the effective latency is decreased by virtue of there being more CPUs on -+BFS (for reasons explained above), and increasing the value allows for less -+cache contention and more throughput. Valid values are from 1 to 1000 -+Decreasing the value will decrease latencies at the cost of decreasing -+throughput, while increasing it will improve throughput, but at the cost of -+worsening latencies. The accuracy of the rr interval is limited by HZ resolution -+of the kernel configuration. Thus, the worst case latencies are usually slightly -+higher than this actual value. The default value of 6 is not an arbitrary one. -+It is based on the fact that humans can detect jitter at approximately 7ms, so -+aiming for much lower latencies is pointless under most circumstances. It is -+worth noting this fact when comparing the latency performance of BFS to other -+schedulers. Worst case latencies being higher than 7ms are far worse than -+average latencies not being in the microsecond range. -+ -+Isochronous scheduling. -+ -+Isochronous scheduling is a unique scheduling policy designed to provide -+near-real-time performance to unprivileged (ie non-root) users without the -+ability to starve the machine indefinitely. Isochronous tasks (which means -+"same time") are set using, for example, the schedtool application like so: -+ -+ schedtool -I -e amarok -+ -+This will start the audio application "amarok" as SCHED_ISO. How SCHED_ISO works -+is that it has a priority level between true realtime tasks and SCHED_NORMAL -+which would allow them to preempt all normal tasks, in a SCHED_RR fashion (ie, -+if multiple SCHED_ISO tasks are running, they purely round robin at rr_interval -+rate). However if ISO tasks run for more than a tunable finite amount of time, -+they are then demoted back to SCHED_NORMAL scheduling. This finite amount of -+time is the percentage of _total CPU_ available across the machine, configurable -+as a percentage in the following "resource handling" tunable (as opposed to a -+scheduler tunable): -+ -+ /proc/sys/kernel/iso_cpu -+ -+and is set to 70% by default. It is calculated over a rolling 5 second average -+Because it is the total CPU available, it means that on a multi CPU machine, it -+is possible to have an ISO task running as realtime scheduling indefinitely on -+just one CPU, as the other CPUs will be available. Setting this to 100 is the -+equivalent of giving all users SCHED_RR access and setting it to 0 removes the -+ability to run any pseudo-realtime tasks. -+ -+A feature of BFS is that it detects when an application tries to obtain a -+realtime policy (SCHED_RR or SCHED_FIFO) and the caller does not have the -+appropriate privileges to use those policies. When it detects this, it will -+give the task SCHED_ISO policy instead. Thus it is transparent to the user. -+Because some applications constantly set their policy as well as their nice -+level, there is potential for them to undo the override specified by the user -+on the command line of setting the policy to SCHED_ISO. To counter this, once -+a task has been set to SCHED_ISO policy, it needs superuser privileges to set -+it back to SCHED_NORMAL. This will ensure the task remains ISO and all child -+processes and threads will also inherit the ISO policy. -+ -+Idleprio scheduling. -+ -+Idleprio scheduling is a scheduling policy designed to give out CPU to a task -+_only_ when the CPU would be otherwise idle. The idea behind this is to allow -+ultra low priority tasks to be run in the background that have virtually no -+effect on the foreground tasks. This is ideally suited to distributed computing -+clients (like setiathome, folding, mprime etc) but can also be used to start -+a video encode or so on without any slowdown of other tasks. To avoid this -+policy from grabbing shared resources and holding them indefinitely, if it -+detects a state where the task is waiting on I/O, the machine is about to -+suspend to ram and so on, it will transiently schedule them as SCHED_NORMAL. As -+per the Isochronous task management, once a task has been scheduled as IDLEPRIO, -+it cannot be put back to SCHED_NORMAL without superuser privileges. Tasks can -+be set to start as SCHED_IDLEPRIO with the schedtool command like so: -+ -+ schedtool -D -e ./mprime -+ -+Subtick accounting. -+ -+It is surprisingly difficult to get accurate CPU accounting, and in many cases, -+the accounting is done by simply determining what is happening at the precise -+moment a timer tick fires off. This becomes increasingly inaccurate as the -+timer tick frequency (HZ) is lowered. It is possible to create an application -+which uses almost 100% CPU, yet by being descheduled at the right time, records -+zero CPU usage. While the main problem with this is that there are possible -+security implications, it is also difficult to determine how much CPU a task -+really does use. BFS tries to use the sub-tick accounting from the TSC clock, -+where possible, to determine real CPU usage. This is not entirely reliable, but -+is far more likely to produce accurate CPU usage data than the existing designs -+and will not show tasks as consuming no CPU usage when they actually are. Thus, -+the amount of CPU reported as being used by BFS will more accurately represent -+how much CPU the task itself is using (as is shown for example by the 'time' -+application), so the reported values may be quite different to other schedulers. -+Values reported as the 'load' are more prone to problems with this design, but -+per process values are closer to real usage. When comparing throughput of BFS -+to other designs, it is important to compare the actual completed work in terms -+of total wall clock time taken and total work done, rather than the reported -+"cpu usage". -+ -+ -+Con Kolivas Fri Aug 27 2010 -diff --git a/Documentation/scheduler/sched-MuQSS.txt b/Documentation/scheduler/sched-MuQSS.txt -new file mode 100644 -index 000000000000..ae28b85c9995 ---- /dev/null -+++ b/Documentation/scheduler/sched-MuQSS.txt -@@ -0,0 +1,373 @@ -+MuQSS - The Multiple Queue Skiplist Scheduler by Con Kolivas. -+ -+MuQSS is a per-cpu runqueue variant of the original BFS scheduler with -+one 8 level skiplist per runqueue, and fine grained locking for much more -+scalability. -+ -+ -+Goals. -+ -+The goal of the Multiple Queue Skiplist Scheduler, referred to as MuQSS from -+here on (pronounced mux) is to completely do away with the complex designs of -+the past for the cpu process scheduler and instead implement one that is very -+simple in basic design. The main focus of MuQSS is to achieve excellent desktop -+interactivity and responsiveness without heuristics and tuning knobs that are -+difficult to understand, impossible to model and predict the effect of, and when -+tuned to one workload cause massive detriment to another, while still being -+scalable to many CPUs and processes. -+ -+ -+Design summary. -+ -+MuQSS is best described as per-cpu multiple runqueue, O(log n) insertion, O(1) -+lookup, earliest effective virtual deadline first tickless design, loosely based -+on EEVDF (earliest eligible virtual deadline first) and my previous Staircase -+Deadline scheduler, and evolved from the single runqueue O(n) BFS scheduler. -+Each component shall be described in order to understand the significance of, -+and reasoning for it. -+ -+ -+Design reasoning. -+ -+In BFS, the use of a single runqueue across all CPUs meant that each CPU would -+need to scan the entire runqueue looking for the process with the earliest -+deadline and schedule that next, regardless of which CPU it originally came -+from. This made BFS deterministic with respect to latency and provided -+guaranteed latencies dependent on number of processes and CPUs. The single -+runqueue, however, meant that all CPUs would compete for the single lock -+protecting it, which would lead to increasing lock contention as the number of -+CPUs rose and appeared to limit scalability of common workloads beyond 16 -+logical CPUs. Additionally, the O(n) lookup of the runqueue list obviously -+increased overhead proportionate to the number of queued proecesses and led to -+cache thrashing while iterating over the linked list. -+ -+MuQSS is an evolution of BFS, designed to maintain the same scheduling -+decision mechanism and be virtually deterministic without relying on the -+constrained design of the single runqueue by splitting out the single runqueue -+to be per-CPU and use skiplists instead of linked lists. -+ -+The original reason for going back to a single runqueue design for BFS was that -+once multiple runqueues are introduced, per-CPU or otherwise, there will be -+complex interactions as each runqueue will be responsible for the scheduling -+latency and fairness of the tasks only on its own runqueue, and to achieve -+fairness and low latency across multiple CPUs, any advantage in throughput of -+having CPU local tasks causes other disadvantages. This is due to requiring a -+very complex balancing system to at best achieve some semblance of fairness -+across CPUs and can only maintain relatively low latency for tasks bound to the -+same CPUs, not across them. To increase said fairness and latency across CPUs, -+the advantage of local runqueue locking, which makes for better scalability, is -+lost due to having to grab multiple locks. -+ -+MuQSS works around the problems inherent in multiple runqueue designs by -+making its skip lists priority ordered and through novel use of lockless -+examination of each other runqueue it can decide if it should take the earliest -+deadline task from another runqueue for latency reasons, or for CPU balancing -+reasons. It still does not have a balancing system, choosing to allow the -+next task scheduling decision and task wakeup CPU choice to allow balancing to -+happen by virtue of its choices. -+ -+As a further evolution of the design, MuQSS normally configures sharing of -+runqueues in a logical fashion for when CPU resources are shared for improved -+latency and throughput. By default it shares runqueues and locks between -+multicore siblings. Optionally it can be configured to run with sharing of -+SMT siblings only, all SMP packages or no sharing at all. Additionally it can -+be selected at boot time. -+ -+ -+Design details. -+ -+Custom skip list implementation: -+ -+To avoid the overhead of building up and tearing down skip list structures, -+the variant used by MuQSS has a number of optimisations making it specific for -+its use case in the scheduler. It uses static arrays of 8 'levels' instead of -+building up and tearing down structures dynamically. This makes each runqueue -+only scale O(log N) up to 64k tasks. However as there is one runqueue per CPU -+it means that it scales O(log N) up to 64k x number of logical CPUs which is -+far beyond the realistic task limits each CPU could handle. By being 8 levels -+it also makes the array exactly one cacheline in size. Additionally, each -+skip list node is bidirectional making insertion and removal amortised O(1), -+being O(k) where k is 1-8. Uniquely, we are only ever interested in the very -+first entry in each list at all times with MuQSS, so there is never a need to -+do a search and thus look up is always O(1). In interactive mode, the queues -+will be searched beyond their first entry if the first task is not suitable -+for affinity or SMT nice reasons. -+ -+Task insertion: -+ -+MuQSS inserts tasks into a per CPU runqueue as an O(log N) insertion into -+a custom skip list as described above (based on the original design by William -+Pugh). Insertion is ordered in such a way that there is never a need to do a -+search by ordering tasks according to static priority primarily, and then -+virtual deadline at the time of insertion. -+ -+Niffies: -+ -+Niffies are a monotonic forward moving timer not unlike the "jiffies" but are -+of nanosecond resolution. Niffies are calculated per-runqueue from the high -+resolution TSC timers, and in order to maintain fairness are synchronised -+between CPUs whenever both runqueues are locked concurrently. -+ -+Virtual deadline: -+ -+The key to achieving low latency, scheduling fairness, and "nice level" -+distribution in MuQSS is entirely in the virtual deadline mechanism. The one -+tunable in MuQSS is the rr_interval, or "round robin interval". This is the -+maximum time two SCHED_OTHER (or SCHED_NORMAL, the common scheduling policy) -+tasks of the same nice level will be running for, or looking at it the other -+way around, the longest duration two tasks of the same nice level will be -+delayed for. When a task requests cpu time, it is given a quota (time_slice) -+equal to the rr_interval and a virtual deadline. The virtual deadline is -+offset from the current time in niffies by this equation: -+ -+ niffies + (prio_ratio * rr_interval) -+ -+The prio_ratio is determined as a ratio compared to the baseline of nice -20 -+and increases by 10% per nice level. The deadline is a virtual one only in that -+no guarantee is placed that a task will actually be scheduled by this time, but -+it is used to compare which task should go next. There are three components to -+how a task is next chosen. First is time_slice expiration. If a task runs out -+of its time_slice, it is descheduled, the time_slice is refilled, and the -+deadline reset to that formula above. Second is sleep, where a task no longer -+is requesting CPU for whatever reason. The time_slice and deadline are _not_ -+adjusted in this case and are just carried over for when the task is next -+scheduled. Third is preemption, and that is when a newly waking task is deemed -+higher priority than a currently running task on any cpu by virtue of the fact -+that it has an earlier virtual deadline than the currently running task. The -+earlier deadline is the key to which task is next chosen for the first and -+second cases. -+ -+The CPU proportion of different nice tasks works out to be approximately the -+ -+ (prio_ratio difference)^2 -+ -+The reason it is squared is that a task's deadline does not change while it is -+running unless it runs out of time_slice. Thus, even if the time actually -+passes the deadline of another task that is queued, it will not get CPU time -+unless the current running task deschedules, and the time "base" (niffies) is -+constantly moving. -+ -+Task lookup: -+ -+As tasks are already pre-ordered according to anticipated scheduling order in -+the skip lists, lookup for the next suitable task per-runqueue is always a -+matter of simply selecting the first task in the 0th level skip list entry. -+In order to maintain optimal latency and fairness across CPUs, MuQSS does a -+novel examination of every other runqueue in cache locality order, choosing the -+best task across all runqueues. This provides near-determinism of how long any -+task across the entire system may wait before receiving CPU time. The other -+runqueues are first examine lockless and then trylocked to minimise the -+potential lock contention if they are likely to have a suitable better task. -+Each other runqueue lock is only held for as long as it takes to examine the -+entry for suitability. In "interactive" mode, the default setting, MuQSS will -+look for the best deadline task across all CPUs, while in !interactive mode, -+it will only select a better deadline task from another CPU if it is more -+heavily laden than the current one. -+ -+Lookup is therefore O(k) where k is number of CPUs. -+ -+ -+Latency. -+ -+Through the use of virtual deadlines to govern the scheduling order of normal -+tasks, queue-to-activation latency per runqueue is guaranteed to be bound by -+the rr_interval tunable which is set to 6ms by default. This means that the -+longest a CPU bound task will wait for more CPU is proportional to the number -+of running tasks and in the common case of 0-2 running tasks per CPU, will be -+under the 7ms threshold for human perception of jitter. Additionally, as newly -+woken tasks will have an early deadline from their previous runtime, the very -+tasks that are usually latency sensitive will have the shortest interval for -+activation, usually preempting any existing CPU bound tasks. -+ -+Tickless expiry: -+ -+A feature of MuQSS is that it is not tied to the resolution of the chosen tick -+rate in Hz, instead depending entirely on the high resolution timers where -+possible for sub-millisecond accuracy on timeouts regarless of the underlying -+tick rate. This allows MuQSS to be run with the low overhead of low Hz rates -+such as 100 by default, benefiting from the improved throughput and lower -+power usage it provides. Another advantage of this approach is that in -+combination with the Full No HZ option, which disables ticks on running task -+CPUs instead of just idle CPUs, the tick can be disabled at all times -+regardless of how many tasks are running instead of being limited to just one -+running task. Note that this option is NOT recommended for regular desktop -+users. -+ -+ -+Scalability and balancing. -+ -+Unlike traditional approaches where balancing is a combination of CPU selection -+at task wakeup and intermittent balancing based on a vast array of rules set -+according to architecture, busyness calculations and special case management, -+MuQSS indirectly balances on the fly at task wakeup and next task selection. -+During initialisation, MuQSS creates a cache coherency ordered list of CPUs for -+each logical CPU and uses this to aid task/CPU selection when CPUs are busy. -+Additionally it selects any idle CPUs, if they are available, at any time over -+busy CPUs according to the following preference: -+ -+ * Same thread, idle or busy cache, idle or busy threads -+ * Other core, same cache, idle or busy cache, idle threads. -+ * Same node, other CPU, idle cache, idle threads. -+ * Same node, other CPU, busy cache, idle threads. -+ * Other core, same cache, busy threads. -+ * Same node, other CPU, busy threads. -+ * Other node, other CPU, idle cache, idle threads. -+ * Other node, other CPU, busy cache, idle threads. -+ * Other node, other CPU, busy threads. -+ -+Mux is therefore SMT, MC and Numa aware without the need for extra -+intermittent balancing to maintain CPUs busy and make the most of cache -+coherency. -+ -+ -+Features -+ -+As the initial prime target audience for MuQSS was the average desktop user, it -+was designed to not need tweaking, tuning or have features set to obtain benefit -+from it. Thus the number of knobs and features has been kept to an absolute -+minimum and should not require extra user input for the vast majority of cases. -+There are 3 optional tunables, and 2 extra scheduling policies. The rr_interval, -+interactive, and iso_cpu tunables, and the SCHED_ISO and SCHED_IDLEPRIO -+policies. In addition to this, MuQSS also uses sub-tick accounting. What MuQSS -+does _not_ now feature is support for CGROUPS. The average user should neither -+need to know what these are, nor should they need to be using them to have good -+desktop behaviour. However since some applications refuse to work without -+cgroups, one can enable them with MuQSS as a stub and the filesystem will be -+created which will allow the applications to work. -+ -+rr_interval: -+ -+ /proc/sys/kernel/rr_interval -+ -+The value is in milliseconds, and the default value is set to 6. Valid values -+are from 1 to 1000 Decreasing the value will decrease latencies at the cost of -+decreasing throughput, while increasing it will improve throughput, but at the -+cost of worsening latencies. It is based on the fact that humans can detect -+jitter at approximately 7ms, so aiming for much lower latencies is pointless -+under most circumstances. It is worth noting this fact when comparing the -+latency performance of MuQSS to other schedulers. Worst case latencies being -+higher than 7ms are far worse than average latencies not being in the -+microsecond range. -+ -+interactive: -+ -+ /proc/sys/kernel/interactive -+ -+The value is a simple boolean of 1 for on and 0 for off and is set to on by -+default. Disabling this will disable the near-determinism of MuQSS when -+selecting the next task by not examining all CPUs for the earliest deadline -+task, or which CPU to wake to, instead prioritising CPU balancing for improved -+throughput. Latency will still be bound by rr_interval, but on a per-CPU basis -+instead of across the whole system. -+ -+Runqueue sharing. -+ -+By default MuQSS chooses to share runqueue resources (specifically the skip -+list and locking) between multicore siblings. It is configurable at build time -+to select between None, SMT, MC and SMP, corresponding to no sharing, sharing -+only between simultaneous mulithreading siblings, multicore siblings, or -+symmetric multiprocessing physical packages. Additionally it can be se at -+bootime with the use of the rqshare parameter. The reason for configurability -+is that some architectures have CPUs with many multicore siblings (>= 16) -+where it may be detrimental to throughput to share runqueues and another -+sharing option may be desirable. Additionally, more sharing than usual can -+improve latency on a system-wide level at the expense of throughput if desired. -+ -+The options are: -+none, smt, mc, smp -+ -+eg: -+ rqshare=mc -+ -+Isochronous scheduling: -+ -+Isochronous scheduling is a unique scheduling policy designed to provide -+near-real-time performance to unprivileged (ie non-root) users without the -+ability to starve the machine indefinitely. Isochronous tasks (which means -+"same time") are set using, for example, the schedtool application like so: -+ -+ schedtool -I -e amarok -+ -+This will start the audio application "amarok" as SCHED_ISO. How SCHED_ISO works -+is that it has a priority level between true realtime tasks and SCHED_NORMAL -+which would allow them to preempt all normal tasks, in a SCHED_RR fashion (ie, -+if multiple SCHED_ISO tasks are running, they purely round robin at rr_interval -+rate). However if ISO tasks run for more than a tunable finite amount of time, -+they are then demoted back to SCHED_NORMAL scheduling. This finite amount of -+time is the percentage of CPU available per CPU, configurable as a percentage in -+the following "resource handling" tunable (as opposed to a scheduler tunable): -+ -+iso_cpu: -+ -+ /proc/sys/kernel/iso_cpu -+ -+and is set to 70% by default. It is calculated over a rolling 5 second average -+Because it is the total CPU available, it means that on a multi CPU machine, it -+is possible to have an ISO task running as realtime scheduling indefinitely on -+just one CPU, as the other CPUs will be available. Setting this to 100 is the -+equivalent of giving all users SCHED_RR access and setting it to 0 removes the -+ability to run any pseudo-realtime tasks. -+ -+A feature of MuQSS is that it detects when an application tries to obtain a -+realtime policy (SCHED_RR or SCHED_FIFO) and the caller does not have the -+appropriate privileges to use those policies. When it detects this, it will -+give the task SCHED_ISO policy instead. Thus it is transparent to the user. -+ -+ -+Idleprio scheduling: -+ -+Idleprio scheduling is a scheduling policy designed to give out CPU to a task -+_only_ when the CPU would be otherwise idle. The idea behind this is to allow -+ultra low priority tasks to be run in the background that have virtually no -+effect on the foreground tasks. This is ideally suited to distributed computing -+clients (like setiathome, folding, mprime etc) but can also be used to start a -+video encode or so on without any slowdown of other tasks. To avoid this policy -+from grabbing shared resources and holding them indefinitely, if it detects a -+state where the task is waiting on I/O, the machine is about to suspend to ram -+and so on, it will transiently schedule them as SCHED_NORMAL. Once a task has -+been scheduled as IDLEPRIO, it cannot be put back to SCHED_NORMAL without -+superuser privileges since it is effectively a lower scheduling policy. Tasks -+can be set to start as SCHED_IDLEPRIO with the schedtool command like so: -+ -+schedtool -D -e ./mprime -+ -+Subtick accounting: -+ -+It is surprisingly difficult to get accurate CPU accounting, and in many cases, -+the accounting is done by simply determining what is happening at the precise -+moment a timer tick fires off. This becomes increasingly inaccurate as the timer -+tick frequency (HZ) is lowered. It is possible to create an application which -+uses almost 100% CPU, yet by being descheduled at the right time, records zero -+CPU usage. While the main problem with this is that there are possible security -+implications, it is also difficult to determine how much CPU a task really does -+use. Mux uses sub-tick accounting from the TSC clock to determine real CPU -+usage. Thus, the amount of CPU reported as being used by MuQSS will more -+accurately represent how much CPU the task itself is using (as is shown for -+example by the 'time' application), so the reported values may be quite -+different to other schedulers. When comparing throughput of MuQSS to other -+designs, it is important to compare the actual completed work in terms of total -+wall clock time taken and total work done, rather than the reported "cpu usage". -+ -+Symmetric MultiThreading (SMT) aware nice: -+ -+SMT, a.k.a. hyperthreading, is a very common feature on modern CPUs. While the -+logical CPU count rises by adding thread units to each CPU core, allowing more -+than one task to be run simultaneously on the same core, the disadvantage of it -+is that the CPU power is shared between the tasks, not summating to the power -+of two CPUs. The practical upshot of this is that two tasks running on -+separate threads of the same core run significantly slower than if they had one -+core each to run on. While smart CPU selection allows each task to have a core -+to itself whenever available (as is done on MuQSS), it cannot offset the -+slowdown that occurs when the cores are all loaded and only a thread is left. -+Most of the time this is harmless as the CPU is effectively overloaded at this -+point and the extra thread is of benefit. However when running a niced task in -+the presence of an un-niced task (say nice 19 v nice 0), the nice task gets -+precisely the same amount of CPU power as the unniced one. MuQSS has an -+optional configuration feature known as SMT-NICE which selectively idles the -+secondary niced thread for a period proportional to the nice difference, -+allowing CPU distribution according to nice level to be maintained, at the -+expense of a small amount of extra overhead. If this is configured in on a -+machine without SMT threads, the overhead is minimal. -+ -+ -+Con Kolivas Sat, 29th October 2016 -diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig -index ef179033a7c2..14b576a531ad 100644 ---- a/arch/alpha/Kconfig -+++ b/arch/alpha/Kconfig -@@ -665,6 +665,8 @@ config HZ - default 1200 if HZ_1200 - default 1024 - -+source "kernel/Kconfig.MuQSS" -+ - config SRM_ENV - tristate "SRM environment through procfs" - depends on PROC_FS -diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig -index 8a50efb559f3..d8507d20c258 100644 ---- a/arch/arm/Kconfig -+++ b/arch/arm/Kconfig -@@ -1238,6 +1238,8 @@ config SCHED_SMT - MultiThreading at a cost of slightly increased overhead in some - places. If unsure say N here. - -+source "kernel/Kconfig.MuQSS" -+ - config HAVE_ARM_SCU - bool - help -diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig -index 3f047afb982c..d35eae0a5c7d 100644 ---- a/arch/arm64/Kconfig -+++ b/arch/arm64/Kconfig -@@ -864,6 +864,8 @@ config SCHED_SMT - MultiThreading at a cost of slightly increased overhead in some - places. If unsure say N here. - -+source "kernel/Kconfig.MuQSS" -+ - config NR_CPUS - int "Maximum number of CPUs (2-4096)" - range 2 4096 -diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig -index 3e56c9c2f16e..ecee9c2a0062 100644 ---- a/arch/powerpc/Kconfig -+++ b/arch/powerpc/Kconfig -@@ -853,6 +853,8 @@ config SCHED_SMT - when dealing with POWER5 cpus at a cost of slightly increased - overhead in some places. If unsure say N here. - -+source "kernel/Kconfig.MuQSS" -+ - config PPC_DENORMALISATION - bool "PowerPC denormalisation exception handling" - depends on PPC_BOOK3S_64 -diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c -index f18d5067cd0f..fe489fc01c73 100644 ---- a/arch/powerpc/platforms/cell/spufs/sched.c -+++ b/arch/powerpc/platforms/cell/spufs/sched.c -@@ -51,11 +51,6 @@ static struct task_struct *spusched_task; - static struct timer_list spusched_timer; - static struct timer_list spuloadavg_timer; - --/* -- * Priority of a normal, non-rt, non-niced'd process (aka nice level 0). -- */ --#define NORMAL_PRIO 120 -- - /* - * Frequency of the spu scheduler tick. By default we do one SPU scheduler - * tick for every 10 CPU scheduler ticks. -diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index 8ef85139553f..7299015f6252 100644 ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -1034,6 +1034,22 @@ config NR_CPUS - config SCHED_SMT - def_bool y if SMP - -+config SMT_NICE -+ bool "SMT (Hyperthreading) aware nice priority and policy support" -+ depends on SCHED_MUQSS && SCHED_SMT -+ default y -+ ---help--- -+ Enabling Hyperthreading on Intel CPUs decreases the effectiveness -+ of the use of 'nice' levels and different scheduling policies -+ (e.g. realtime) due to sharing of CPU power between hyperthreads. -+ SMT nice support makes each logical CPU aware of what is running on -+ its hyperthread siblings, maintaining appropriate distribution of -+ CPU according to nice levels and scheduling policies at the expense -+ of slightly increased overhead. -+ -+ If unsure say Y here. -+ -+ - config SCHED_MC - def_bool y - prompt "Multi-core scheduler support" -@@ -1064,6 +1080,8 @@ config SCHED_MC_PRIO - - If unsure say Y here. - -+source "kernel/Kconfig.MuQSS" -+ - config UP_LATE_INIT - def_bool y - depends on !SMP && X86_LOCAL_APIC -diff --git a/fs/proc/base.c b/fs/proc/base.c -index ebea9501afb8..51c9346a69fe 100644 ---- a/fs/proc/base.c -+++ b/fs/proc/base.c -@@ -477,7 +477,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, - seq_puts(m, "0 0 0\n"); - else - seq_printf(m, "%llu %llu %lu\n", -- (unsigned long long)task->se.sum_exec_runtime, -+ (unsigned long long)tsk_seruntime(task), - (unsigned long long)task->sched_info.run_delay, - task->sched_info.pcount); - -diff --git a/include/linux/init_task.h b/include/linux/init_task.h -index 2c620d7ac432..73417df5daa2 100644 ---- a/include/linux/init_task.h -+++ b/include/linux/init_task.h -@@ -36,7 +36,11 @@ extern struct cred init_cred; - #define INIT_PREV_CPUTIME(x) - #endif - -+#ifdef CONFIG_SCHED_MUQSS -+#define INIT_TASK_COMM "MuQSS" -+#else - #define INIT_TASK_COMM "swapper" -+#endif - - /* Attach to the init_task data structure for proper alignment */ - #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK -diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h -index e9bfe6972aed..16ba1c7e5bde 100644 ---- a/include/linux/ioprio.h -+++ b/include/linux/ioprio.h -@@ -53,6 +53,8 @@ enum { - */ - static inline int task_nice_ioprio(struct task_struct *task) - { -+ if (iso_task(task)) -+ return 0; - return (task_nice(task) + 20) / 5; - } - -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 67a1d86981a9..0849781f069b 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -31,6 +31,9 @@ - #include - #include - #include -+#ifdef CONFIG_SCHED_MUQSS -+#include -+#endif - - /* task_struct member predeclarations (sorted alphabetically): */ - struct audit_context; -@@ -644,9 +647,11 @@ struct task_struct { - unsigned int flags; - unsigned int ptrace; - -+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_MUQSS) -+ int on_cpu; -+#endif - #ifdef CONFIG_SMP - struct llist_node wake_entry; -- int on_cpu; - #ifdef CONFIG_THREAD_INFO_IN_TASK - /* Current CPU: */ - unsigned int cpu; -@@ -671,10 +676,25 @@ struct task_struct { - int static_prio; - int normal_prio; - unsigned int rt_priority; -+#ifdef CONFIG_SCHED_MUQSS -+ int time_slice; -+ u64 deadline; -+ skiplist_node node; /* Skip list node */ -+ u64 last_ran; -+ u64 sched_time; /* sched_clock time spent running */ -+#ifdef CONFIG_SMT_NICE -+ int smt_bias; /* Policy/nice level bias across smt siblings */ -+#endif -+#ifdef CONFIG_HOTPLUG_CPU -+ bool zerobound; /* Bound to CPU0 for hotplug */ -+#endif -+ unsigned long rt_timeout; -+#else /* CONFIG_SCHED_MUQSS */ - - const struct sched_class *sched_class; - struct sched_entity se; - struct sched_rt_entity rt; -+#endif - #ifdef CONFIG_CGROUP_SCHED - struct task_group *sched_task_group; - #endif -@@ -839,6 +859,10 @@ struct task_struct { - #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME - u64 utimescaled; - u64 stimescaled; -+#endif -+#ifdef CONFIG_SCHED_MUQSS -+ /* Unbanked cpu time */ -+ unsigned long utime_ns, stime_ns; - #endif - u64 gtime; - struct prev_cputime prev_cputime; -@@ -1283,6 +1307,40 @@ struct task_struct { - */ - }; - -+#ifdef CONFIG_SCHED_MUQSS -+#define tsk_seruntime(t) ((t)->sched_time) -+#define tsk_rttimeout(t) ((t)->rt_timeout) -+ -+static inline void tsk_cpus_current(struct task_struct *p) -+{ -+} -+ -+void print_scheduler_version(void); -+ -+static inline bool iso_task(struct task_struct *p) -+{ -+ return (p->policy == SCHED_ISO); -+} -+#else /* CFS */ -+#define tsk_seruntime(t) ((t)->se.sum_exec_runtime) -+#define tsk_rttimeout(t) ((t)->rt.timeout) -+ -+static inline void tsk_cpus_current(struct task_struct *p) -+{ -+ p->nr_cpus_allowed = current->nr_cpus_allowed; -+} -+ -+static inline void print_scheduler_version(void) -+{ -+ printk(KERN_INFO "CFS CPU scheduler.\n"); -+} -+ -+static inline bool iso_task(struct task_struct *p) -+{ -+ return false; -+} -+#endif /* CONFIG_SCHED_MUQSS */ -+ - static inline struct pid *task_pid(struct task_struct *task) - { - return task->thread_pid; -diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h -index 1aff00b65f3c..73d6319a856a 100644 ---- a/include/linux/sched/deadline.h -+++ b/include/linux/sched/deadline.h -@@ -28,7 +28,16 @@ static inline bool dl_time_before(u64 a, u64 b) - #ifdef CONFIG_SMP - - struct root_domain; -+#ifdef CONFIG_SCHED_MUQSS -+static inline void dl_clear_root_domain(struct root_domain *rd) -+{ -+} -+static inline void dl_add_task_root_domain(struct task_struct *p) -+{ -+} -+#else /* CONFIG_SCHED_MUQSS */ - extern void dl_add_task_root_domain(struct task_struct *p); - extern void dl_clear_root_domain(struct root_domain *rd); -+#endif /* CONFIG_SCHED_MUQSS */ - - #endif /* CONFIG_SMP */ -diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h -index 1abe91ff6e4a..20ba383562b0 100644 ---- a/include/linux/sched/nohz.h -+++ b/include/linux/sched/nohz.h -@@ -13,7 +13,7 @@ extern int get_nohz_timer_target(void); - static inline void nohz_balance_enter_idle(int cpu) { } - #endif - --#ifdef CONFIG_NO_HZ_COMMON -+#if defined(CONFIG_NO_HZ_COMMON) && !defined(CONFIG_SCHED_MUQSS) - void calc_load_nohz_start(void); - void calc_load_nohz_stop(void); - #else -diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h -index 7d64feafc408..43c9d9e50c09 100644 ---- a/include/linux/sched/prio.h -+++ b/include/linux/sched/prio.h -@@ -20,8 +20,20 @@ - */ - - #define MAX_USER_RT_PRIO 100 -+ -+#ifdef CONFIG_SCHED_MUQSS -+/* Note different MAX_RT_PRIO */ -+#define MAX_RT_PRIO (MAX_USER_RT_PRIO + 1) -+ -+#define ISO_PRIO (MAX_RT_PRIO) -+#define NORMAL_PRIO (MAX_RT_PRIO + 1) -+#define IDLE_PRIO (MAX_RT_PRIO + 2) -+#define PRIO_LIMIT ((IDLE_PRIO) + 1) -+#else /* CONFIG_SCHED_MUQSS */ - #define MAX_RT_PRIO MAX_USER_RT_PRIO - -+#endif /* CONFIG_SCHED_MUQSS */ -+ - #define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) - #define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) - -diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h -index e5af028c08b4..010b2244e0b6 100644 ---- a/include/linux/sched/rt.h -+++ b/include/linux/sched/rt.h -@@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk) - - if (policy == SCHED_FIFO || policy == SCHED_RR) - return true; -+#ifndef CONFIG_SCHED_MUQSS - if (policy == SCHED_DEADLINE) - return true; -+#endif - return false; - } - -diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h -index 4b1c3b664f51..a9671b48799c 100644 ---- a/include/linux/sched/task.h -+++ b/include/linux/sched/task.h -@@ -99,7 +99,7 @@ extern long kernel_wait4(pid_t, int __user *, int, struct rusage *); - extern void free_task(struct task_struct *tsk); - - /* sched_exec is called by processes performing an exec */ --#ifdef CONFIG_SMP -+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_MUQSS) - extern void sched_exec(void); - #else - #define sched_exec() {} -diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h -new file mode 100644 -index 000000000000..d4be84ba273b ---- /dev/null -+++ b/include/linux/skip_list.h -@@ -0,0 +1,33 @@ -+#ifndef _LINUX_SKIP_LISTS_H -+#define _LINUX_SKIP_LISTS_H -+typedef u64 keyType; -+typedef void *valueType; -+ -+typedef struct nodeStructure skiplist_node; -+ -+struct nodeStructure { -+ int level; /* Levels in this structure */ -+ keyType key; -+ valueType value; -+ skiplist_node *next[8]; -+ skiplist_node *prev[8]; -+}; -+ -+typedef struct listStructure { -+ int entries; -+ int level; /* Maximum level of the list -+ (1 more than the number of levels in the list) */ -+ skiplist_node *header; /* pointer to header */ -+} skiplist; -+ -+void skiplist_init(skiplist_node *slnode); -+skiplist *new_skiplist(skiplist_node *slnode); -+void free_skiplist(skiplist *l); -+void skiplist_node_init(skiplist_node *node); -+void skiplist_insert(skiplist *l, skiplist_node *node, keyType key, valueType value, unsigned int randseed); -+void skiplist_delete(skiplist *l, skiplist_node *node); -+ -+static inline bool skiplist_node_empty(skiplist_node *node) { -+ return (!node->next[0]); -+} -+#endif /* _LINUX_SKIP_LISTS_H */ -diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h -index 25b4fa00bad1..c2503cd28025 100644 ---- a/include/uapi/linux/sched.h -+++ b/include/uapi/linux/sched.h -@@ -84,9 +84,16 @@ struct clone_args { - #define SCHED_FIFO 1 - #define SCHED_RR 2 - #define SCHED_BATCH 3 --/* SCHED_ISO: reserved but not implemented yet */ -+/* SCHED_ISO: Implemented on MuQSS only */ - #define SCHED_IDLE 5 -+#ifdef CONFIG_SCHED_MUQSS -+#define SCHED_ISO 4 -+#define SCHED_IDLEPRIO SCHED_IDLE -+#define SCHED_MAX (SCHED_IDLEPRIO) -+#define SCHED_RANGE(policy) ((policy) <= SCHED_MAX) -+#else /* CONFIG_SCHED_MUQSS */ - #define SCHED_DEADLINE 6 -+#endif /* CONFIG_SCHED_MUQSS */ - - /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ - #define SCHED_RESET_ON_FORK 0x40000000 -diff --git a/init/Kconfig b/init/Kconfig -index b4daad2bac23..da90d33ba4b3 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -73,6 +73,18 @@ config THREAD_INFO_IN_TASK - - menu "General setup" - -+config SCHED_MUQSS -+ bool "MuQSS cpu scheduler" -+ select HIGH_RES_TIMERS -+ ---help--- -+ The Multiple Queue Skiplist Scheduler for excellent interactivity and -+ responsiveness on the desktop and highly scalable deterministic -+ low latency on any hardware. -+ -+ Say Y here. -+ default y -+ -+ - config BROKEN - bool - -@@ -802,6 +814,7 @@ config NUMA_BALANCING - depends on ARCH_SUPPORTS_NUMA_BALANCING - depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY - depends on SMP && NUMA && MIGRATION -+ depends on !SCHED_MUQSS - help - This option adds support for automatic NUMA aware memory/task placement. - The mechanism is quite primitive and is based on migrating memory when -@@ -901,9 +914,13 @@ menuconfig CGROUP_SCHED - help - This feature lets CPU scheduler recognize task groups and control CPU - bandwidth allocation to such task groups. It uses cgroups to group -- tasks. -+ tasks. In combination with MuQSS this is purely a STUB to create the -+ files associated with the CPU controller cgroup but most of the -+ controls do nothing. This is useful for working in environments and -+ with applications that will only work if this control group is -+ present. - --if CGROUP_SCHED -+if CGROUP_SCHED && !SCHED_MUQSS - config FAIR_GROUP_SCHED - bool "Group scheduling for SCHED_OTHER" - depends on CGROUP_SCHED -@@ -1032,6 +1049,7 @@ config CGROUP_DEVICE - - config CGROUP_CPUACCT - bool "Simple CPU accounting controller" -+ depends on !SCHED_MUQSS - help - Provides a simple controller for monitoring the - total CPU consumed by the tasks in a cgroup. -@@ -1150,6 +1168,7 @@ config CHECKPOINT_RESTORE - - config SCHED_AUTOGROUP - bool "Automatic process group scheduling" -+ depends on !SCHED_MUQSS - select CGROUPS - select CGROUP_SCHED - select FAIR_GROUP_SCHED -diff --git a/init/init_task.c b/init/init_task.c -index 9e5cbe5eab7b..5c2bcbf25add 100644 ---- a/init/init_task.c -+++ b/init/init_task.c -@@ -66,9 +66,17 @@ struct task_struct init_task - .stack = init_stack, - .usage = REFCOUNT_INIT(2), - .flags = PF_KTHREAD, -+#ifdef CONFIG_SCHED_MUQSS -+ .prio = NORMAL_PRIO, -+ .static_prio = MAX_PRIO - 20, -+ .normal_prio = NORMAL_PRIO, -+ .deadline = 0, -+ .time_slice = 1000000, -+#else - .prio = MAX_PRIO - 20, - .static_prio = MAX_PRIO - 20, - .normal_prio = MAX_PRIO - 20, -+#endif - .policy = SCHED_NORMAL, - .cpus_ptr = &init_task.cpus_mask, - .cpus_mask = CPU_MASK_ALL, -@@ -78,6 +86,7 @@ struct task_struct init_task - .restart_block = { - .fn = do_no_restart_syscall, - }, -+#ifndef CONFIG_SCHED_MUQSS - .se = { - .group_node = LIST_HEAD_INIT(init_task.se.group_node), - }, -@@ -85,6 +94,7 @@ struct task_struct init_task - .run_list = LIST_HEAD_INIT(init_task.rt.run_list), - .time_slice = RR_TIMESLICE, - }, -+#endif - .tasks = LIST_HEAD_INIT(init_task.tasks), - #ifdef CONFIG_SMP - .pushable_tasks = PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO), -diff --git a/init/main.c b/init/main.c -index 91f6ebb30ef0..22792032de64 100644 ---- a/init/main.c -+++ b/init/main.c -@@ -1124,6 +1124,8 @@ static int __ref kernel_init(void *unused) - - rcu_end_inkernel_boot(); - -+ print_scheduler_version(); -+ - if (ramdisk_execute_command) { - ret = run_init_process(ramdisk_execute_command); - if (!ret) -diff --git a/kernel/Kconfig.MuQSS b/kernel/Kconfig.MuQSS -new file mode 100644 -index 000000000000..a6a58781ef91 ---- /dev/null -+++ b/kernel/Kconfig.MuQSS -@@ -0,0 +1,105 @@ -+choice -+ prompt "CPU scheduler runqueue sharing" -+ default RQ_MC if SCHED_MUQSS -+ default RQ_NONE -+ -+config RQ_NONE -+ bool "No sharing" -+ help -+ This is the default behaviour where the CPU scheduler has one runqueue -+ per CPU, whether it is a physical or logical CPU (hyperthread). -+ -+ This can still be enabled runtime with the boot parameter -+ rqshare=none -+ -+ If unsure, say N. -+ -+config RQ_SMT -+ bool "SMT (hyperthread) siblings" -+ depends on SCHED_SMT && SCHED_MUQSS -+ -+ help -+ With this option enabled, the CPU scheduler will have one runqueue -+ shared by SMT (hyperthread) siblings. As these logical cores share -+ one physical core, sharing the runqueue resource can lead to decreased -+ overhead, lower latency and higher throughput. -+ -+ This can still be enabled runtime with the boot parameter -+ rqshare=smt -+ -+ If unsure, say N. -+ -+config RQ_MC -+ bool "Multicore siblings" -+ depends on SCHED_MC && SCHED_MUQSS -+ help -+ With this option enabled, the CPU scheduler will have one runqueue -+ shared by multicore siblings in addition to any SMT siblings. -+ As these physical cores share caches, sharing the runqueue resource -+ will lead to lower latency, but its effects on overhead and throughput -+ are less predictable. As a general rule, 6 or fewer cores will likely -+ benefit from this, while larger CPUs will only derive a latency -+ benefit. If your workloads are primarily single threaded, this will -+ possibly worsen throughput. If you are only concerned about latency -+ then enable this regardless of how many cores you have. -+ -+ This can still be enabled runtime with the boot parameter -+ rqshare=mc -+ -+ If unsure, say Y. -+ -+config RQ_MC_LLC -+ bool "Multicore siblings (LLC)" -+ depends on SCHED_MC && SCHED_MUQSS -+ help -+ With this option enabled, the CPU scheduler will behave similarly as -+ with "Multicore siblings". -+ This option takes LLC cache into account when scheduling tasks. -+ Option may benefit CPUs with multiple LLC caches, such as Ryzen -+ and Xeon CPUs. -+ -+ This can still be enabled runtime with the boot parameter -+ rqshare=llc -+ -+ If unsure, say N. -+ -+config RQ_SMP -+ bool "Symmetric Multi-Processing" -+ depends on SMP && SCHED_MUQSS -+ help -+ With this option enabled, the CPU scheduler will have one runqueue -+ shared by all physical CPUs unless they are on separate NUMA nodes. -+ As physical CPUs usually do not share resources, sharing the runqueue -+ will normally worsen throughput but improve latency. If you only -+ care about latency enable this. -+ -+ This can still be enabled runtime with the boot parameter -+ rqshare=smp -+ -+ If unsure, say N. -+ -+config RQ_ALL -+ bool "NUMA" -+ depends on SMP && SCHED_MUQSS -+ help -+ With this option enabled, the CPU scheduler will have one runqueue -+ regardless of the architecture configuration, including across NUMA -+ nodes. This can substantially decrease throughput in NUMA -+ configurations, but light NUMA designs will not be dramatically -+ affected. This option should only be chosen if latency is the prime -+ concern. -+ -+ This can still be enabled runtime with the boot parameter -+ rqshare=all -+ -+ If unsure, say N. -+endchoice -+ -+config SHARERQ -+ int -+ default 0 if RQ_NONE -+ default 1 if RQ_SMT -+ default 2 if RQ_MC -+ default 3 if RQ_MC_LLC -+ default 4 if RQ_SMP -+ default 5 if RQ_ALL -diff --git a/kernel/Makefile b/kernel/Makefile -index daad787fb795..9bb44fc4ef5b 100644 ---- a/kernel/Makefile -+++ b/kernel/Makefile -@@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o \ - extable.o params.o \ - kthread.o sys_ni.o nsproxy.o \ - notifier.o ksysfs.o cred.o reboot.o \ -- async.o range.o smpboot.o ucount.o -+ async.o range.o smpboot.o ucount.o skip_list.o - - obj-$(CONFIG_MODULES) += kmod.o - obj-$(CONFIG_MULTIUSER) += groups.o -diff --git a/kernel/delayacct.c b/kernel/delayacct.c -index 27725754ac99..769d773c7182 100644 ---- a/kernel/delayacct.c -+++ b/kernel/delayacct.c -@@ -106,7 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) - */ - t1 = tsk->sched_info.pcount; - t2 = tsk->sched_info.run_delay; -- t3 = tsk->se.sum_exec_runtime; -+ t3 = tsk_seruntime(tsk); - - d->cpu_count += t1; - -diff --git a/kernel/exit.c b/kernel/exit.c -index a46a50d67002..58043176b285 100644 ---- a/kernel/exit.c -+++ b/kernel/exit.c -@@ -131,7 +131,7 @@ static void __exit_signal(struct task_struct *tsk) - sig->curr_target = next_thread(tsk); - } - -- add_device_randomness((const void*) &tsk->se.sum_exec_runtime, -+ add_device_randomness((const void*) &tsk_seruntime(tsk), - sizeof(unsigned long long)); - - /* -@@ -152,7 +152,7 @@ static void __exit_signal(struct task_struct *tsk) - sig->inblock += task_io_get_inblock(tsk); - sig->oublock += task_io_get_oublock(tsk); - task_io_accounting_add(&sig->ioac, &tsk->ioac); -- sig->sum_sched_runtime += tsk->se.sum_exec_runtime; -+ sig->sum_sched_runtime += tsk_seruntime(tsk); - sig->nr_threads--; - __unhash_process(tsk, group_dead); - write_sequnlock(&sig->stats_lock); -diff --git a/kernel/kthread.c b/kernel/kthread.c -index b262f47046ca..9797ad652268 100644 ---- a/kernel/kthread.c -+++ b/kernel/kthread.c -@@ -433,6 +433,34 @@ void kthread_bind(struct task_struct *p, unsigned int cpu) - } - EXPORT_SYMBOL(kthread_bind); - -+#if defined(CONFIG_SCHED_MUQSS) && defined(CONFIG_SMP) -+extern void __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); -+ -+/* -+ * new_kthread_bind is a special variant of __kthread_bind_mask. -+ * For new threads to work on muqss we want to call do_set_cpus_allowed -+ * without the task_cpu being set and the task rescheduled until they're -+ * rescheduled on their own so we call __do_set_cpus_allowed directly which -+ * only changes the cpumask. This is particularly important for smpboot threads -+ * to work. -+ */ -+static void new_kthread_bind(struct task_struct *p, unsigned int cpu) -+{ -+ unsigned long flags; -+ -+ if (WARN_ON(!wait_task_inactive(p, TASK_UNINTERRUPTIBLE))) -+ return; -+ -+ /* It's safe because the task is inactive. */ -+ raw_spin_lock_irqsave(&p->pi_lock, flags); -+ __do_set_cpus_allowed(p, cpumask_of(cpu)); -+ p->flags |= PF_NO_SETAFFINITY; -+ raw_spin_unlock_irqrestore(&p->pi_lock, flags); -+} -+#else -+#define new_kthread_bind(p, cpu) kthread_bind(p, cpu) -+#endif -+ - /** - * kthread_create_on_cpu - Create a cpu bound kthread - * @threadfn: the function to run until signal_pending(current). -@@ -454,7 +482,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), - cpu); - if (IS_ERR(p)) - return p; -- kthread_bind(p, cpu); -+ new_kthread_bind(p, cpu); - /* CPU hotplug need to bind once again when unparking the thread. */ - set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags); - to_kthread(p)->cpu = cpu; -diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c -index cdf318d86dd6..304c0c8c2bea 100644 ---- a/kernel/livepatch/transition.c -+++ b/kernel/livepatch/transition.c -@@ -282,7 +282,7 @@ static bool klp_try_switch_task(struct task_struct *task) - { - static char err_buf[STACK_ERR_BUF_SIZE]; - struct rq *rq; -- struct rq_flags flags; -+ struct rq_flags rf; - int ret; - bool success = false; - -@@ -304,7 +304,7 @@ static bool klp_try_switch_task(struct task_struct *task) - * functions. If all goes well, switch the task to the target patch - * state. - */ -- rq = task_rq_lock(task, &flags); -+ rq = task_rq_lock(task, &rf); - - if (task_running(rq, task) && task != current) { - snprintf(err_buf, STACK_ERR_BUF_SIZE, -@@ -323,7 +323,7 @@ static bool klp_try_switch_task(struct task_struct *task) - task->patch_state = klp_target_state; - - done: -- task_rq_unlock(rq, task, &flags); -+ task_rq_unlock(rq, task, &rf); - - /* - * Due to console deadlock issues, pr_debug() can't be used while -diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile -index 21fb5a5662b5..a04ffebc6b7a 100644 ---- a/kernel/sched/Makefile -+++ b/kernel/sched/Makefile -@@ -16,15 +16,23 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) - CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer - endif - -+ifdef CONFIG_SCHED_MUQSS -+obj-y += MuQSS.o clock.o cputime.o -+obj-y += idle.o -+obj-y += wait.o wait_bit.o swait.o completion.o -+ -+obj-$(CONFIG_SMP) += topology.o -+else - obj-y += core.o loadavg.o clock.o cputime.o - obj-y += idle.o fair.o rt.o deadline.o - obj-y += wait.o wait_bit.o swait.o completion.o - - obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o - obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o --obj-$(CONFIG_SCHEDSTATS) += stats.o - obj-$(CONFIG_SCHED_DEBUG) += debug.o - obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o -+endif -+obj-$(CONFIG_SCHEDSTATS) += stats.o - obj-$(CONFIG_CPU_FREQ) += cpufreq.o - obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o - obj-$(CONFIG_MEMBARRIER) += membarrier.o -diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c -new file mode 100644 -index 000000000000..fafb5a790cf1 ---- /dev/null -+++ b/kernel/sched/MuQSS.c -@@ -0,0 +1,7606 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * kernel/sched/MuQSS.c, was kernel/sched.c -+ * -+ * Kernel scheduler and related syscalls -+ * -+ * Copyright (C) 1991-2002 Linus Torvalds -+ * -+ * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and -+ * make semaphores SMP safe -+ * 1998-11-19 Implemented schedule_timeout() and related stuff -+ * by Andrea Arcangeli -+ * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar: -+ * hybrid priority-list and round-robin design with -+ * an array-switch method of distributing timeslices -+ * and per-CPU runqueues. Cleanups and useful suggestions -+ * by Davide Libenzi, preemptible kernel bits by Robert Love. -+ * 2003-09-03 Interactivity tuning by Con Kolivas. -+ * 2004-04-02 Scheduler domains code by Nick Piggin -+ * 2007-04-15 Work begun on replacing all interactivity tuning with a -+ * fair scheduling design by Con Kolivas. -+ * 2007-05-05 Load balancing (smp-nice) and other improvements -+ * by Peter Williams -+ * 2007-05-06 Interactivity improvements to CFS by Mike Galbraith -+ * 2007-07-01 Group scheduling enhancements by Srivatsa Vaddagiri -+ * 2007-11-29 RT balancing improvements by Steven Rostedt, Gregory Haskins, -+ * Thomas Gleixner, Mike Kravetz -+ * 2009-08-13 Brainfuck deadline scheduling policy by Con Kolivas deletes -+ * a whole lot of those previous things. -+ * 2016-10-01 Multiple Queue Skiplist Scheduler scalable evolution of BFS -+ * scheduler by Con Kolivas. -+ * 2019-08-31 LLC bits by Eduards Bezverhijs -+ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include "../workqueue_internal.h" -+#include "../smpboot.h" -+ -+#define CREATE_TRACE_POINTS -+#include -+ -+#include "MuQSS.h" -+ -+#define rt_prio(prio) unlikely((prio) < MAX_RT_PRIO) -+#define rt_task(p) rt_prio((p)->prio) -+#define batch_task(p) (unlikely((p)->policy == SCHED_BATCH)) -+#define is_rt_policy(policy) ((policy) == SCHED_FIFO || \ -+ (policy) == SCHED_RR) -+#define has_rt_policy(p) unlikely(is_rt_policy((p)->policy)) -+ -+#define is_idle_policy(policy) ((policy) == SCHED_IDLEPRIO) -+#define idleprio_task(p) unlikely(is_idle_policy((p)->policy)) -+#define task_running_idle(p) unlikely((p)->prio == IDLE_PRIO) -+ -+#define is_iso_policy(policy) ((policy) == SCHED_ISO) -+#define iso_task(p) unlikely(is_iso_policy((p)->policy)) -+#define task_running_iso(p) unlikely((p)->prio == ISO_PRIO) -+ -+#define rq_idle(rq) ((rq)->rq_prio == PRIO_LIMIT) -+ -+#define ISO_PERIOD (5 * HZ) -+ -+#define STOP_PRIO (MAX_RT_PRIO - 1) -+ -+/* -+ * Some helpers for converting to/from various scales. Use shifts to get -+ * approximate multiples of ten for less overhead. -+ */ -+#define APPROX_NS_PS (1073741824) /* Approximate ns per second */ -+#define JIFFIES_TO_NS(TIME) ((TIME) * (APPROX_NS_PS / HZ)) -+#define JIFFY_NS (APPROX_NS_PS / HZ) -+#define JIFFY_US (1048576 / HZ) -+#define NS_TO_JIFFIES(TIME) ((TIME) / JIFFY_NS) -+#define HALF_JIFFY_NS (APPROX_NS_PS / HZ / 2) -+#define HALF_JIFFY_US (1048576 / HZ / 2) -+#define MS_TO_NS(TIME) ((TIME) << 20) -+#define MS_TO_US(TIME) ((TIME) << 10) -+#define NS_TO_MS(TIME) ((TIME) >> 20) -+#define NS_TO_US(TIME) ((TIME) >> 10) -+#define US_TO_NS(TIME) ((TIME) << 10) -+#define TICK_APPROX_NS ((APPROX_NS_PS+HZ/2)/HZ) -+ -+#define RESCHED_US (100) /* Reschedule if less than this many μs left */ -+ -+void print_scheduler_version(void) -+{ -+ printk(KERN_INFO "MuQSS CPU scheduler v0.196 by Con Kolivas.\n"); -+} -+ -+/* Define RQ share levels */ -+#define RQSHARE_NONE 0 -+#define RQSHARE_SMT 1 -+#define RQSHARE_MC 2 -+#define RQSHARE_MC_LLC 3 -+#define RQSHARE_SMP 4 -+#define RQSHARE_ALL 5 -+ -+/* Define locality levels */ -+#define LOCALITY_SAME 0 -+#define LOCALITY_SMT 1 -+#define LOCALITY_MC_LLC 2 -+#define LOCALITY_MC 3 -+#define LOCALITY_SMP 4 -+#define LOCALITY_DISTANT 5 -+ -+/* -+ * This determines what level of runqueue sharing will be done and is -+ * configurable at boot time with the bootparam rqshare = -+ */ -+static int rqshare __read_mostly = CONFIG_SHARERQ; /* Default RQSHARE_MC */ -+ -+static int __init set_rqshare(char *str) -+{ -+ if (!strncmp(str, "none", 4)) { -+ rqshare = RQSHARE_NONE; -+ return 0; -+ } -+ if (!strncmp(str, "smt", 3)) { -+ rqshare = RQSHARE_SMT; -+ return 0; -+ } -+ if (!strncmp(str, "mc", 2)) { -+ rqshare = RQSHARE_MC; -+ return 0; -+ } -+ if (!strncmp(str, "llc", 3)) { -+ rqshare = RQSHARE_MC_LLC; -+ return 0; -+ } -+ if (!strncmp(str, "smp", 3)) { -+ rqshare = RQSHARE_SMP; -+ return 0; -+ } -+ if (!strncmp(str, "all", 3)) { -+ rqshare = RQSHARE_ALL; -+ return 0; -+ } -+ return 1; -+} -+__setup("rqshare=", set_rqshare); -+ -+/* -+ * This is the time all tasks within the same priority round robin. -+ * Value is in ms and set to a minimum of 6ms. -+ * Tunable via /proc interface. -+ */ -+int rr_interval __read_mostly = 6; -+ -+/* -+ * Tunable to choose whether to prioritise latency or throughput, simple -+ * binary yes or no -+ */ -+int sched_interactive __read_mostly = 1; -+ -+/* -+ * sched_iso_cpu - sysctl which determines the cpu percentage SCHED_ISO tasks -+ * are allowed to run five seconds as real time tasks. This is the total over -+ * all online cpus. -+ */ -+int sched_iso_cpu __read_mostly = 70; -+ -+/* -+ * sched_yield_type - Choose what sort of yield sched_yield will perform. -+ * 0: No yield. -+ * 1: Yield only to better priority/deadline tasks. (default) -+ * 2: Expire timeslice and recalculate deadline. -+ */ -+int sched_yield_type __read_mostly = 1; -+ -+/* -+ * The relative length of deadline for each priority(nice) level. -+ */ -+static int prio_ratios[NICE_WIDTH] __read_mostly; -+ -+ -+/* -+ * The quota handed out to tasks of all priority levels when refilling their -+ * time_slice. -+ */ -+static inline int timeslice(void) -+{ -+ return MS_TO_US(rr_interval); -+} -+ -+DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); -+ -+#ifdef CONFIG_SMP -+/* -+ * Total number of runqueues. Equals number of CPUs when there is no runqueue -+ * sharing but is usually less with SMT/MC sharing of runqueues. -+ */ -+static int total_runqueues __read_mostly = 1; -+ -+static cpumask_t cpu_idle_map ____cacheline_aligned_in_smp; -+ -+struct rq *cpu_rq(int cpu) -+{ -+ return &per_cpu(runqueues, (cpu)); -+} -+#define cpu_curr(cpu) (cpu_rq(cpu)->curr) -+ -+/* -+ * For asym packing, by default the lower numbered cpu has higher priority. -+ */ -+int __weak arch_asym_cpu_priority(int cpu) -+{ -+ return -cpu; -+} -+ -+int __weak arch_sd_sibling_asym_packing(void) -+{ -+ return 0*SD_ASYM_PACKING; -+} -+ -+#ifdef CONFIG_SCHED_SMT -+DEFINE_STATIC_KEY_FALSE(sched_smt_present); -+EXPORT_SYMBOL_GPL(sched_smt_present); -+#endif -+ -+#else -+struct rq *uprq; -+#endif /* CONFIG_SMP */ -+ -+#include "stats.h" -+ -+/* -+ * All common locking functions performed on rq->lock. rq->clock is local to -+ * the CPU accessing it so it can be modified just with interrupts disabled -+ * when we're not updating niffies. -+ * Looking up task_rq must be done under rq->lock to be safe. -+ */ -+ -+/* -+ * RQ-clock updating methods: -+ */ -+ -+#ifdef HAVE_SCHED_AVG_IRQ -+static void update_irq_load_avg(struct rq *rq, long delta); -+#else -+static inline void update_irq_load_avg(struct rq *rq, long delta) {} -+#endif -+ -+static void update_rq_clock_task(struct rq *rq, s64 delta) -+{ -+/* -+ * In theory, the compile should just see 0 here, and optimize out the call -+ * to sched_rt_avg_update. But I don't trust it... -+ */ -+ s64 __maybe_unused steal = 0, irq_delta = 0; -+#ifdef CONFIG_IRQ_TIME_ACCOUNTING -+ irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; -+ -+ /* -+ * Since irq_time is only updated on {soft,}irq_exit, we might run into -+ * this case when a previous update_rq_clock() happened inside a -+ * {soft,}irq region. -+ * -+ * When this happens, we stop ->clock_task and only update the -+ * prev_irq_time stamp to account for the part that fit, so that a next -+ * update will consume the rest. This ensures ->clock_task is -+ * monotonic. -+ * -+ * It does however cause some slight miss-attribution of {soft,}irq -+ * time, a more accurate solution would be to update the irq_time using -+ * the current rq->clock timestamp, except that would require using -+ * atomic ops. -+ */ -+ if (irq_delta > delta) -+ irq_delta = delta; -+ -+ rq->prev_irq_time += irq_delta; -+ delta -= irq_delta; -+#endif -+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING -+ if (static_key_false((¶virt_steal_rq_enabled))) { -+ steal = paravirt_steal_clock(cpu_of(rq)); -+ steal -= rq->prev_steal_time_rq; -+ -+ if (unlikely(steal > delta)) -+ steal = delta; -+ -+ rq->prev_steal_time_rq += steal; -+ delta -= steal; -+ } -+#endif -+ rq->clock_task += delta; -+ -+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ -+ if (irq_delta + steal) -+ update_irq_load_avg(rq, irq_delta + steal); -+#endif -+} -+ -+static inline void update_rq_clock(struct rq *rq) -+{ -+ s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; -+ -+ if (unlikely(delta < 0)) -+ return; -+ rq->clock += delta; -+ update_rq_clock_task(rq, delta); -+} -+ -+/* -+ * Niffies are a globally increasing nanosecond counter. They're only used by -+ * update_load_avg and time_slice_expired, however deadlines are based on them -+ * across CPUs. Update them whenever we will call one of those functions, and -+ * synchronise them across CPUs whenever we hold both runqueue locks. -+ */ -+static inline void update_clocks(struct rq *rq) -+{ -+ s64 ndiff, minndiff; -+ long jdiff; -+ -+ update_rq_clock(rq); -+ ndiff = rq->clock - rq->old_clock; -+ rq->old_clock = rq->clock; -+ jdiff = jiffies - rq->last_jiffy; -+ -+ /* Subtract any niffies added by balancing with other rqs */ -+ ndiff -= rq->niffies - rq->last_niffy; -+ minndiff = JIFFIES_TO_NS(jdiff) - rq->niffies + rq->last_jiffy_niffies; -+ if (minndiff < 0) -+ minndiff = 0; -+ ndiff = max(ndiff, minndiff); -+ rq->niffies += ndiff; -+ rq->last_niffy = rq->niffies; -+ if (jdiff) { -+ rq->last_jiffy += jdiff; -+ rq->last_jiffy_niffies = rq->niffies; -+ } -+} -+ -+/* -+ * Any time we have two runqueues locked we use that as an opportunity to -+ * synchronise niffies to the highest value as idle ticks may have artificially -+ * kept niffies low on one CPU and the truth can only be later. -+ */ -+static inline void synchronise_niffies(struct rq *rq1, struct rq *rq2) -+{ -+ if (rq1->niffies > rq2->niffies) -+ rq2->niffies = rq1->niffies; -+ else -+ rq1->niffies = rq2->niffies; -+} -+ -+/* -+ * double_rq_lock - safely lock two runqueues -+ * -+ * Note this does not disable interrupts like task_rq_lock, -+ * you need to do so manually before calling. -+ */ -+ -+/* For when we know rq1 != rq2 */ -+static inline void __double_rq_lock(struct rq *rq1, struct rq *rq2) -+ __acquires(rq1->lock) -+ __acquires(rq2->lock) -+{ -+ if (rq1 < rq2) { -+ raw_spin_lock(rq1->lock); -+ raw_spin_lock_nested(rq2->lock, SINGLE_DEPTH_NESTING); -+ } else { -+ raw_spin_lock(rq2->lock); -+ raw_spin_lock_nested(rq1->lock, SINGLE_DEPTH_NESTING); -+ } -+} -+ -+static inline void double_rq_lock(struct rq *rq1, struct rq *rq2) -+ __acquires(rq1->lock) -+ __acquires(rq2->lock) -+{ -+ BUG_ON(!irqs_disabled()); -+ if (rq1->lock == rq2->lock) { -+ raw_spin_lock(rq1->lock); -+ __acquire(rq2->lock); /* Fake it out ;) */ -+ } else -+ __double_rq_lock(rq1, rq2); -+ synchronise_niffies(rq1, rq2); -+} -+ -+/* -+ * double_rq_unlock - safely unlock two runqueues -+ * -+ * Note this does not restore interrupts like task_rq_unlock, -+ * you need to do so manually after calling. -+ */ -+static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2) -+ __releases(rq1->lock) -+ __releases(rq2->lock) -+{ -+ raw_spin_unlock(rq1->lock); -+ if (rq1->lock != rq2->lock) -+ raw_spin_unlock(rq2->lock); -+ else -+ __release(rq2->lock); -+} -+ -+static inline void lock_all_rqs(void) -+{ -+ int cpu; -+ -+ preempt_disable(); -+ for_each_possible_cpu(cpu) { -+ struct rq *rq = cpu_rq(cpu); -+ -+ do_raw_spin_lock(rq->lock); -+ } -+} -+ -+static inline void unlock_all_rqs(void) -+{ -+ int cpu; -+ -+ for_each_possible_cpu(cpu) { -+ struct rq *rq = cpu_rq(cpu); -+ -+ do_raw_spin_unlock(rq->lock); -+ } -+ preempt_enable(); -+} -+ -+/* Specially nest trylock an rq */ -+static inline bool trylock_rq(struct rq *this_rq, struct rq *rq) -+{ -+ if (unlikely(!do_raw_spin_trylock(rq->lock))) -+ return false; -+ spin_acquire(&rq->lock->dep_map, SINGLE_DEPTH_NESTING, 1, _RET_IP_); -+ synchronise_niffies(this_rq, rq); -+ return true; -+} -+ -+/* Unlock a specially nested trylocked rq */ -+static inline void unlock_rq(struct rq *rq) -+{ -+ spin_release(&rq->lock->dep_map, 1, _RET_IP_); -+ do_raw_spin_unlock(rq->lock); -+} -+ -+/* -+ * cmpxchg based fetch_or, macro so it works for different integer types -+ */ -+#define fetch_or(ptr, mask) \ -+ ({ \ -+ typeof(ptr) _ptr = (ptr); \ -+ typeof(mask) _mask = (mask); \ -+ typeof(*_ptr) _old, _val = *_ptr; \ -+ \ -+ for (;;) { \ -+ _old = cmpxchg(_ptr, _val, _val | _mask); \ -+ if (_old == _val) \ -+ break; \ -+ _val = _old; \ -+ } \ -+ _old; \ -+}) -+ -+#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) -+/* -+ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG, -+ * this avoids any races wrt polling state changes and thereby avoids -+ * spurious IPIs. -+ */ -+static bool set_nr_and_not_polling(struct task_struct *p) -+{ -+ struct thread_info *ti = task_thread_info(p); -+ return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG); -+} -+ -+/* -+ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set. -+ * -+ * If this returns true, then the idle task promises to call -+ * sched_ttwu_pending() and reschedule soon. -+ */ -+static bool set_nr_if_polling(struct task_struct *p) -+{ -+ struct thread_info *ti = task_thread_info(p); -+ typeof(ti->flags) old, val = READ_ONCE(ti->flags); -+ -+ for (;;) { -+ if (!(val & _TIF_POLLING_NRFLAG)) -+ return false; -+ if (val & _TIF_NEED_RESCHED) -+ return true; -+ old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED); -+ if (old == val) -+ break; -+ val = old; -+ } -+ return true; -+} -+ -+#else -+static bool set_nr_and_not_polling(struct task_struct *p) -+{ -+ set_tsk_need_resched(p); -+ return true; -+} -+ -+#ifdef CONFIG_SMP -+static bool set_nr_if_polling(struct task_struct *p) -+{ -+ return false; -+} -+#endif -+#endif -+ -+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task) -+{ -+ struct wake_q_node *node = &task->wake_q; -+ -+ /* -+ * Atomically grab the task, if ->wake_q is !nil already it means -+ * its already queued (either by us or someone else) and will get the -+ * wakeup due to that. -+ * -+ * In order to ensure that a pending wakeup will observe our pending -+ * state, even in the failed case, an explicit smp_mb() must be used. -+ */ -+ smp_mb__before_atomic(); -+ if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))) -+ return false; -+ -+ /* -+ * The head is context local, there can be no concurrency. -+ */ -+ *head->lastp = node; -+ head->lastp = &node->next; -+ return true; -+} -+ -+/** -+ * wake_q_add() - queue a wakeup for 'later' waking. -+ * @head: the wake_q_head to add @task to -+ * @task: the task to queue for 'later' wakeup -+ * -+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the -+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come -+ * instantly. -+ * -+ * This function must be used as-if it were wake_up_process(); IOW the task -+ * must be ready to be woken at this location. -+ */ -+void wake_q_add(struct wake_q_head *head, struct task_struct *task) -+{ -+ if (__wake_q_add(head, task)) -+ get_task_struct(task); -+} -+ -+/** -+ * wake_q_add_safe() - safely queue a wakeup for 'later' waking. -+ * @head: the wake_q_head to add @task to -+ * @task: the task to queue for 'later' wakeup -+ * -+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the -+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come -+ * instantly. -+ * -+ * This function must be used as-if it were wake_up_process(); IOW the task -+ * must be ready to be woken at this location. -+ * -+ * This function is essentially a task-safe equivalent to wake_q_add(). Callers -+ * that already hold reference to @task can call the 'safe' version and trust -+ * wake_q to do the right thing depending whether or not the @task is already -+ * queued for wakeup. -+ */ -+void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) -+{ -+ if (!__wake_q_add(head, task)) -+ put_task_struct(task); -+} -+ -+void wake_up_q(struct wake_q_head *head) -+{ -+ struct wake_q_node *node = head->first; -+ -+ while (node != WAKE_Q_TAIL) { -+ struct task_struct *task; -+ -+ task = container_of(node, struct task_struct, wake_q); -+ BUG_ON(!task); -+ /* Task can safely be re-inserted now */ -+ node = node->next; -+ task->wake_q.next = NULL; -+ -+ /* -+ * wake_up_process() executes a full barrier, which pairs with -+ * the queueing in wake_q_add() so as not to miss wakeups. -+ */ -+ wake_up_process(task); -+ put_task_struct(task); -+ } -+} -+ -+static inline void smp_sched_reschedule(int cpu) -+{ -+ if (likely(cpu_online(cpu))) -+ smp_send_reschedule(cpu); -+} -+ -+/* -+ * resched_task - mark a task 'to be rescheduled now'. -+ * -+ * On UP this means the setting of the need_resched flag, on SMP it -+ * might also involve a cross-CPU call to trigger the scheduler on -+ * the target CPU. -+ */ -+void resched_task(struct task_struct *p) -+{ -+ int cpu; -+#ifdef CONFIG_LOCKDEP -+ /* Kernel threads call this when creating workqueues while still -+ * inactive from __kthread_bind_mask, holding only the pi_lock */ -+ if (!(p->flags & PF_KTHREAD)) { -+ struct rq *rq = task_rq(p); -+ -+ lockdep_assert_held(rq->lock); -+ } -+#endif -+ if (test_tsk_need_resched(p)) -+ return; -+ -+ cpu = task_cpu(p); -+ if (cpu == smp_processor_id()) { -+ set_tsk_need_resched(p); -+ set_preempt_need_resched(); -+ return; -+ } -+ -+ if (set_nr_and_not_polling(p)) -+ smp_sched_reschedule(cpu); -+ else -+ trace_sched_wake_idle_without_ipi(cpu); -+} -+ -+/* -+ * A task that is not running or queued will not have a node set. -+ * A task that is queued but not running will have a node set. -+ * A task that is currently running will have ->on_cpu set but no node set. -+ */ -+static inline bool task_queued(struct task_struct *p) -+{ -+ return !skiplist_node_empty(&p->node); -+} -+ -+static void enqueue_task(struct rq *rq, struct task_struct *p, int flags); -+static inline void resched_if_idle(struct rq *rq); -+ -+/* Dodgy workaround till we figure out where the softirqs are going */ -+static inline void do_pending_softirq(struct rq *rq, struct task_struct *next) -+{ -+ if (unlikely(next == rq->idle && local_softirq_pending() && !in_interrupt())) -+ do_softirq_own_stack(); -+} -+ -+static inline bool deadline_before(u64 deadline, u64 time) -+{ -+ return (deadline < time); -+} -+ -+/* -+ * Deadline is "now" in niffies + (offset by priority). Setting the deadline -+ * is the key to everything. It distributes cpu fairly amongst tasks of the -+ * same nice value, it proportions cpu according to nice level, it means the -+ * task that last woke up the longest ago has the earliest deadline, thus -+ * ensuring that interactive tasks get low latency on wake up. The CPU -+ * proportion works out to the square of the virtual deadline difference, so -+ * this equation will give nice 19 3% CPU compared to nice 0. -+ */ -+static inline u64 prio_deadline_diff(int user_prio) -+{ -+ return (prio_ratios[user_prio] * rr_interval * (MS_TO_NS(1) / 128)); -+} -+ -+static inline u64 task_deadline_diff(struct task_struct *p) -+{ -+ return prio_deadline_diff(TASK_USER_PRIO(p)); -+} -+ -+static inline u64 static_deadline_diff(int static_prio) -+{ -+ return prio_deadline_diff(USER_PRIO(static_prio)); -+} -+ -+static inline int longest_deadline_diff(void) -+{ -+ return prio_deadline_diff(39); -+} -+ -+static inline int ms_longest_deadline_diff(void) -+{ -+ return NS_TO_MS(longest_deadline_diff()); -+} -+ -+static inline bool rq_local(struct rq *rq); -+ -+#ifndef SCHED_CAPACITY_SCALE -+#define SCHED_CAPACITY_SCALE 1024 -+#endif -+ -+static inline int rq_load(struct rq *rq) -+{ -+ return rq->nr_running; -+} -+ -+/* -+ * Update the load average for feeding into cpu frequency governors. Use a -+ * rough estimate of a rolling average with ~ time constant of 32ms. -+ * 80/128 ~ 0.63. * 80 / 32768 / 128 == * 5 / 262144 -+ * Make sure a call to update_clocks has been made before calling this to get -+ * an updated rq->niffies. -+ */ -+static void update_load_avg(struct rq *rq, unsigned int flags) -+{ -+ long us_interval, load; -+ unsigned long curload; -+ -+ us_interval = NS_TO_US(rq->niffies - rq->load_update); -+ if (unlikely(us_interval <= 0)) -+ return; -+ -+ curload = rq_load(rq); -+ load = rq->load_avg - (rq->load_avg * us_interval * 5 / 262144); -+ if (unlikely(load < 0)) -+ load = 0; -+ load += curload * curload * SCHED_CAPACITY_SCALE * us_interval * 5 / 262144; -+ rq->load_avg = load; -+ -+ rq->load_update = rq->niffies; -+ update_irq_load_avg(rq, 0); -+ if (likely(rq_local(rq))) -+ cpufreq_trigger(rq, flags); -+} -+ -+#ifdef HAVE_SCHED_AVG_IRQ -+/* -+ * IRQ variant of update_load_avg below. delta is actually time in nanoseconds -+ * here so we scale curload to how long it's been since the last update. -+ */ -+static void update_irq_load_avg(struct rq *rq, long delta) -+{ -+ long us_interval, load; -+ unsigned long curload; -+ -+ us_interval = NS_TO_US(rq->niffies - rq->irq_load_update); -+ if (unlikely(us_interval <= 0)) -+ return; -+ -+ curload = NS_TO_US(delta) / us_interval; -+ load = rq->irq_load_avg - (rq->irq_load_avg * us_interval * 5 / 262144); -+ if (unlikely(load < 0)) -+ load = 0; -+ load += curload * curload * SCHED_CAPACITY_SCALE * us_interval * 5 / 262144; -+ rq->irq_load_avg = load; -+ -+ rq->irq_load_update = rq->niffies; -+} -+#endif -+ -+/* -+ * Removing from the runqueue. Enter with rq locked. Deleting a task -+ * from the skip list is done via the stored node reference in the task struct -+ * and does not require a full look up. Thus it occurs in O(k) time where k -+ * is the "level" of the list the task was stored at - usually < 4, max 8. -+ */ -+static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) -+{ -+ skiplist_delete(rq->sl, &p->node); -+ rq->best_key = rq->node->next[0]->key; -+ update_clocks(rq); -+ -+ if (!(flags & DEQUEUE_SAVE)) { -+ sched_info_dequeued(rq, p); -+ psi_dequeue(p, flags & DEQUEUE_SLEEP); -+ } -+ rq->nr_running--; -+ if (rt_task(p)) -+ rq->rt_nr_running--; -+ update_load_avg(rq, flags); -+} -+ -+#ifdef CONFIG_PREEMPT_RCU -+static bool rcu_read_critical(struct task_struct *p) -+{ -+ return p->rcu_read_unlock_special.b.blocked; -+} -+#else /* CONFIG_PREEMPT_RCU */ -+#define rcu_read_critical(p) (false) -+#endif /* CONFIG_PREEMPT_RCU */ -+ -+/* -+ * To determine if it's safe for a task of SCHED_IDLEPRIO to actually run as -+ * an idle task, we ensure none of the following conditions are met. -+ */ -+static bool idleprio_suitable(struct task_struct *p) -+{ -+ return (!(task_contributes_to_load(p)) && !(p->flags & (PF_EXITING)) && -+ !signal_pending(p) && !rcu_read_critical(p) && !freezing(p)); -+} -+ -+/* -+ * To determine if a task of SCHED_ISO can run in pseudo-realtime, we check -+ * that the iso_refractory flag is not set. -+ */ -+static inline bool isoprio_suitable(struct rq *rq) -+{ -+ return !rq->iso_refractory; -+} -+ -+/* -+ * Adding to the runqueue. Enter with rq locked. -+ */ -+static void enqueue_task(struct rq *rq, struct task_struct *p, int flags) -+{ -+ unsigned int randseed, cflags = 0; -+ u64 sl_id; -+ -+ if (!rt_task(p)) { -+ /* Check it hasn't gotten rt from PI */ -+ if ((idleprio_task(p) && idleprio_suitable(p)) || -+ (iso_task(p) && isoprio_suitable(rq))) -+ p->prio = p->normal_prio; -+ else -+ p->prio = NORMAL_PRIO; -+ } else -+ rq->rt_nr_running++; -+ /* -+ * The sl_id key passed to the skiplist generates a sorted list. -+ * Realtime and sched iso tasks run FIFO so they only need be sorted -+ * according to priority. The skiplist will put tasks of the same -+ * key inserted later in FIFO order. Tasks of sched normal, batch -+ * and idleprio are sorted according to their deadlines. Idleprio -+ * tasks are offset by an impossibly large deadline value ensuring -+ * they get sorted into last positions, but still according to their -+ * own deadlines. This creates a "landscape" of skiplists running -+ * from priority 0 realtime in first place to the lowest priority -+ * idleprio tasks last. Skiplist insertion is an O(log n) process. -+ */ -+ if (p->prio <= ISO_PRIO) { -+ sl_id = p->prio; -+ } else { -+ sl_id = p->deadline; -+ if (idleprio_task(p)) { -+ if (p->prio == IDLE_PRIO) -+ sl_id |= 0xF000000000000000; -+ else -+ sl_id += longest_deadline_diff(); -+ } -+ } -+ /* -+ * Some architectures don't have better than microsecond resolution -+ * so mask out ~microseconds as the random seed for skiplist insertion. -+ */ -+ update_clocks(rq); -+ if (!(flags & ENQUEUE_RESTORE)) { -+ sched_info_queued(rq, p); -+ psi_enqueue(p, flags & ENQUEUE_WAKEUP); -+ } -+ -+ randseed = (rq->niffies >> 10) & 0xFFFFFFFF; -+ skiplist_insert(rq->sl, &p->node, sl_id, p, randseed); -+ rq->best_key = rq->node->next[0]->key; -+ if (p->in_iowait) -+ cflags |= SCHED_CPUFREQ_IOWAIT; -+ rq->nr_running++; -+ update_load_avg(rq, cflags); -+} -+ -+/* -+ * Returns the relative length of deadline all compared to the shortest -+ * deadline which is that of nice -20. -+ */ -+static inline int task_prio_ratio(struct task_struct *p) -+{ -+ return prio_ratios[TASK_USER_PRIO(p)]; -+} -+ -+/* -+ * task_timeslice - all tasks of all priorities get the exact same timeslice -+ * length. CPU distribution is handled by giving different deadlines to -+ * tasks of different priorities. Use 128 as the base value for fast shifts. -+ */ -+static inline int task_timeslice(struct task_struct *p) -+{ -+ return (rr_interval * task_prio_ratio(p) / 128); -+} -+ -+#ifdef CONFIG_SMP -+/* Entered with rq locked */ -+static inline void resched_if_idle(struct rq *rq) -+{ -+ if (rq_idle(rq)) -+ resched_task(rq->curr); -+} -+ -+static inline bool rq_local(struct rq *rq) -+{ -+ return (rq->cpu == smp_processor_id()); -+} -+#ifdef CONFIG_SMT_NICE -+static const cpumask_t *thread_cpumask(int cpu); -+ -+/* Find the best real time priority running on any SMT siblings of cpu and if -+ * none are running, the static priority of the best deadline task running. -+ * The lookups to the other runqueues is done lockless as the occasional wrong -+ * value would be harmless. */ -+static int best_smt_bias(struct rq *this_rq) -+{ -+ int other_cpu, best_bias = 0; -+ -+ for_each_cpu(other_cpu, &this_rq->thread_mask) { -+ struct rq *rq = cpu_rq(other_cpu); -+ -+ if (rq_idle(rq)) -+ continue; -+ if (unlikely(!rq->online)) -+ continue; -+ if (!rq->rq_mm) -+ continue; -+ if (likely(rq->rq_smt_bias > best_bias)) -+ best_bias = rq->rq_smt_bias; -+ } -+ return best_bias; -+} -+ -+static int task_prio_bias(struct task_struct *p) -+{ -+ if (rt_task(p)) -+ return 1 << 30; -+ else if (task_running_iso(p)) -+ return 1 << 29; -+ else if (task_running_idle(p)) -+ return 0; -+ return MAX_PRIO - p->static_prio; -+} -+ -+static bool smt_always_schedule(struct task_struct __maybe_unused *p, struct rq __maybe_unused *this_rq) -+{ -+ return true; -+} -+ -+static bool (*smt_schedule)(struct task_struct *p, struct rq *this_rq) = &smt_always_schedule; -+ -+/* We've already decided p can run on CPU, now test if it shouldn't for SMT -+ * nice reasons. */ -+static bool smt_should_schedule(struct task_struct *p, struct rq *this_rq) -+{ -+ int best_bias, task_bias; -+ -+ /* Kernel threads always run */ -+ if (unlikely(!p->mm)) -+ return true; -+ if (rt_task(p)) -+ return true; -+ if (!idleprio_suitable(p)) -+ return true; -+ best_bias = best_smt_bias(this_rq); -+ /* The smt siblings are all idle or running IDLEPRIO */ -+ if (best_bias < 1) -+ return true; -+ task_bias = task_prio_bias(p); -+ if (task_bias < 1) -+ return false; -+ if (task_bias >= best_bias) -+ return true; -+ /* Dither 25% cpu of normal tasks regardless of nice difference */ -+ if (best_bias % 4 == 1) -+ return true; -+ /* Sorry, you lose */ -+ return false; -+} -+#else /* CONFIG_SMT_NICE */ -+#define smt_schedule(p, this_rq) (true) -+#endif /* CONFIG_SMT_NICE */ -+ -+static inline void atomic_set_cpu(int cpu, cpumask_t *cpumask) -+{ -+ set_bit(cpu, (volatile unsigned long *)cpumask); -+} -+ -+/* -+ * The cpu_idle_map stores a bitmap of all the CPUs currently idle to -+ * allow easy lookup of whether any suitable idle CPUs are available. -+ * It's cheaper to maintain a binary yes/no if there are any idle CPUs on the -+ * idle_cpus variable than to do a full bitmask check when we are busy. The -+ * bits are set atomically but read locklessly as occasional false positive / -+ * negative is harmless. -+ */ -+static inline void set_cpuidle_map(int cpu) -+{ -+ if (likely(cpu_online(cpu))) -+ atomic_set_cpu(cpu, &cpu_idle_map); -+} -+ -+static inline void atomic_clear_cpu(int cpu, cpumask_t *cpumask) -+{ -+ clear_bit(cpu, (volatile unsigned long *)cpumask); -+} -+ -+static inline void clear_cpuidle_map(int cpu) -+{ -+ atomic_clear_cpu(cpu, &cpu_idle_map); -+} -+ -+static bool suitable_idle_cpus(struct task_struct *p) -+{ -+ return (cpumask_intersects(p->cpus_ptr, &cpu_idle_map)); -+} -+ -+/* -+ * Resched current on rq. We don't know if rq is local to this CPU nor if it -+ * is locked so we do not use an intermediate variable for the task to avoid -+ * having it dereferenced. -+ */ -+static void resched_curr(struct rq *rq) -+{ -+ int cpu; -+ -+ if (test_tsk_need_resched(rq->curr)) -+ return; -+ -+ rq->preempt = rq->curr; -+ cpu = rq->cpu; -+ -+ /* We're doing this without holding the rq lock if it's not task_rq */ -+ -+ if (cpu == smp_processor_id()) { -+ set_tsk_need_resched(rq->curr); -+ set_preempt_need_resched(); -+ return; -+ } -+ -+ if (set_nr_and_not_polling(rq->curr)) -+ smp_sched_reschedule(cpu); -+ else -+ trace_sched_wake_idle_without_ipi(cpu); -+} -+ -+#define CPUIDLE_DIFF_THREAD (1) -+#define CPUIDLE_DIFF_CORE_LLC (2) -+#define CPUIDLE_DIFF_CORE (4) -+#define CPUIDLE_CACHE_BUSY (8) -+#define CPUIDLE_DIFF_CPU (16) -+#define CPUIDLE_THREAD_BUSY (32) -+#define CPUIDLE_DIFF_NODE (64) -+ -+/* -+ * The best idle CPU is chosen according to the CPUIDLE ranking above where the -+ * lowest value would give the most suitable CPU to schedule p onto next. The -+ * order works out to be the following: -+ * -+ * Same thread, idle or busy cache, idle or busy threads -+ * Other core, same cache, idle or busy cache, idle threads. -+ * Same node, other CPU, idle cache, idle threads. -+ * Same node, other CPU, busy cache, idle threads. -+ * Other core, same cache, busy threads. -+ * Same node, other CPU, busy threads. -+ * Other node, other CPU, idle cache, idle threads. -+ * Other node, other CPU, busy cache, idle threads. -+ * Other node, other CPU, busy threads. -+ */ -+static int best_mask_cpu(int best_cpu, struct rq *rq, cpumask_t *tmpmask) -+{ -+ int best_ranking = CPUIDLE_DIFF_NODE | CPUIDLE_THREAD_BUSY | -+ CPUIDLE_DIFF_CPU | CPUIDLE_CACHE_BUSY | CPUIDLE_DIFF_CORE | -+ CPUIDLE_DIFF_CORE_LLC | CPUIDLE_DIFF_THREAD; -+ int cpu_tmp; -+ -+ if (cpumask_test_cpu(best_cpu, tmpmask)) -+ goto out; -+ -+ for_each_cpu(cpu_tmp, tmpmask) { -+ int ranking, locality; -+ struct rq *tmp_rq; -+ -+ ranking = 0; -+ tmp_rq = cpu_rq(cpu_tmp); -+ -+ locality = rq->cpu_locality[cpu_tmp]; -+#ifdef CONFIG_NUMA -+ if (locality > LOCALITY_SMP) -+ ranking |= CPUIDLE_DIFF_NODE; -+ else -+#endif -+ if (locality > LOCALITY_MC) -+ ranking |= CPUIDLE_DIFF_CPU; -+#ifdef CONFIG_SCHED_MC -+ else if (locality == LOCALITY_MC_LLC) -+ ranking |= CPUIDLE_DIFF_CORE_LLC; -+ else if (locality == LOCALITY_MC) -+ ranking |= CPUIDLE_DIFF_CORE; -+ if (!(tmp_rq->cache_idle(tmp_rq))) -+ ranking |= CPUIDLE_CACHE_BUSY; -+#endif -+#ifdef CONFIG_SCHED_SMT -+ if (locality == LOCALITY_SMT) -+ ranking |= CPUIDLE_DIFF_THREAD; -+#endif -+ if (ranking < best_ranking -+#ifdef CONFIG_SCHED_SMT -+ || (ranking == best_ranking && (tmp_rq->siblings_idle(tmp_rq))) -+#endif -+ ) { -+ best_cpu = cpu_tmp; -+ best_ranking = ranking; -+ } -+ } -+out: -+ return best_cpu; -+} -+ -+bool cpus_share_cache(int this_cpu, int that_cpu) -+{ -+ struct rq *this_rq = cpu_rq(this_cpu); -+ -+ return (this_rq->cpu_locality[that_cpu] < LOCALITY_SMP); -+} -+ -+/* As per resched_curr but only will resched idle task */ -+static inline void resched_idle(struct rq *rq) -+{ -+ if (test_tsk_need_resched(rq->idle)) -+ return; -+ -+ rq->preempt = rq->idle; -+ -+ set_tsk_need_resched(rq->idle); -+ -+ if (rq_local(rq)) { -+ set_preempt_need_resched(); -+ return; -+ } -+ -+ smp_sched_reschedule(rq->cpu); -+} -+ -+static struct rq *resched_best_idle(struct task_struct *p, int cpu) -+{ -+ cpumask_t tmpmask; -+ struct rq *rq; -+ int best_cpu; -+ -+ cpumask_and(&tmpmask, p->cpus_ptr, &cpu_idle_map); -+ best_cpu = best_mask_cpu(cpu, task_rq(p), &tmpmask); -+ rq = cpu_rq(best_cpu); -+ if (!smt_schedule(p, rq)) -+ return NULL; -+ rq->preempt = p; -+ resched_idle(rq); -+ return rq; -+} -+ -+static inline void resched_suitable_idle(struct task_struct *p) -+{ -+ if (suitable_idle_cpus(p)) -+ resched_best_idle(p, task_cpu(p)); -+} -+ -+static inline struct rq *rq_order(struct rq *rq, int cpu) -+{ -+ return rq->rq_order[cpu]; -+} -+#else /* CONFIG_SMP */ -+static inline void set_cpuidle_map(int cpu) -+{ -+} -+ -+static inline void clear_cpuidle_map(int cpu) -+{ -+} -+ -+static inline bool suitable_idle_cpus(struct task_struct *p) -+{ -+ return uprq->curr == uprq->idle; -+} -+ -+static inline void resched_suitable_idle(struct task_struct *p) -+{ -+} -+ -+static inline void resched_curr(struct rq *rq) -+{ -+ resched_task(rq->curr); -+} -+ -+static inline void resched_if_idle(struct rq *rq) -+{ -+} -+ -+static inline bool rq_local(struct rq *rq) -+{ -+ return true; -+} -+ -+static inline struct rq *rq_order(struct rq *rq, int cpu) -+{ -+ return rq; -+} -+ -+static inline bool smt_schedule(struct task_struct *p, struct rq *rq) -+{ -+ return true; -+} -+#endif /* CONFIG_SMP */ -+ -+static inline int normal_prio(struct task_struct *p) -+{ -+ if (has_rt_policy(p)) -+ return MAX_RT_PRIO - 1 - p->rt_priority; -+ if (idleprio_task(p)) -+ return IDLE_PRIO; -+ if (iso_task(p)) -+ return ISO_PRIO; -+ return NORMAL_PRIO; -+} -+ -+/* -+ * Calculate the current priority, i.e. the priority -+ * taken into account by the scheduler. This value might -+ * be boosted by RT tasks as it will be RT if the task got -+ * RT-boosted. If not then it returns p->normal_prio. -+ */ -+static int effective_prio(struct task_struct *p) -+{ -+ p->normal_prio = normal_prio(p); -+ /* -+ * If we are RT tasks or we were boosted to RT priority, -+ * keep the priority unchanged. Otherwise, update priority -+ * to the normal priority: -+ */ -+ if (!rt_prio(p->prio)) -+ return p->normal_prio; -+ return p->prio; -+} -+ -+/* -+ * activate_task - move a task to the runqueue. Enter with rq locked. -+ */ -+static void activate_task(struct rq *rq, struct task_struct *p, int flags) -+{ -+ resched_if_idle(rq); -+ -+ /* -+ * Sleep time is in units of nanosecs, so shift by 20 to get a -+ * milliseconds-range estimation of the amount of time that the task -+ * spent sleeping: -+ */ -+ if (unlikely(prof_on == SLEEP_PROFILING)) { -+ if (p->state == TASK_UNINTERRUPTIBLE) -+ profile_hits(SLEEP_PROFILING, (void *)get_wchan(p), -+ (rq->niffies - p->last_ran) >> 20); -+ } -+ -+ p->prio = effective_prio(p); -+ if (task_contributes_to_load(p)) -+ rq->nr_uninterruptible--; -+ -+ enqueue_task(rq, p, flags); -+ p->on_rq = TASK_ON_RQ_QUEUED; -+} -+ -+/* -+ * deactivate_task - If it's running, it's not on the runqueue and we can just -+ * decrement the nr_running. Enter with rq locked. -+ */ -+static inline void deactivate_task(struct task_struct *p, struct rq *rq) -+{ -+ if (task_contributes_to_load(p)) -+ rq->nr_uninterruptible++; -+ -+ p->on_rq = 0; -+ sched_info_dequeued(rq, p); -+ /* deactivate_task is always DEQUEUE_SLEEP in muqss */ -+ psi_dequeue(p, DEQUEUE_SLEEP); -+} -+ -+#ifdef CONFIG_SMP -+void set_task_cpu(struct task_struct *p, unsigned int new_cpu) -+{ -+ struct rq *rq; -+ -+ if (task_cpu(p) == new_cpu) -+ return; -+ -+ /* Do NOT call set_task_cpu on a currently queued task as we will not -+ * be reliably holding the rq lock after changing CPU. */ -+ BUG_ON(task_queued(p)); -+ rq = task_rq(p); -+ -+#ifdef CONFIG_LOCKDEP -+ /* -+ * The caller should hold either p->pi_lock or rq->lock, when changing -+ * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks. -+ * -+ * Furthermore, all task_rq users should acquire both locks, see -+ * task_rq_lock(). -+ */ -+ WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) || -+ lockdep_is_held(rq->lock))); -+#endif -+ -+ trace_sched_migrate_task(p, new_cpu); -+ rseq_migrate(p); -+ perf_event_task_migrate(p); -+ -+ /* -+ * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be -+ * successfully executed on another CPU. We must ensure that updates of -+ * per-task data have been completed by this moment. -+ */ -+ smp_wmb(); -+ -+ p->wake_cpu = new_cpu; -+ -+ if (task_running(rq, p)) { -+ /* -+ * We should only be calling this on a running task if we're -+ * holding rq lock. -+ */ -+ lockdep_assert_held(rq->lock); -+ -+ /* -+ * We can't change the task_thread_info CPU on a running task -+ * as p will still be protected by the rq lock of the CPU it -+ * is still running on so we only set the wake_cpu for it to be -+ * lazily updated once off the CPU. -+ */ -+ return; -+ } -+ -+#ifdef CONFIG_THREAD_INFO_IN_TASK -+ WRITE_ONCE(p->cpu, new_cpu); -+#else -+ WRITE_ONCE(task_thread_info(p)->cpu, new_cpu); -+#endif -+ /* We're no longer protecting p after this point since we're holding -+ * the wrong runqueue lock. */ -+} -+#endif /* CONFIG_SMP */ -+ -+/* -+ * Move a task off the runqueue and take it to a cpu for it will -+ * become the running task. -+ */ -+static inline void take_task(struct rq *rq, int cpu, struct task_struct *p) -+{ -+ struct rq *p_rq = task_rq(p); -+ -+ dequeue_task(p_rq, p, DEQUEUE_SAVE); -+ if (p_rq != rq) { -+ sched_info_dequeued(p_rq, p); -+ sched_info_queued(rq, p); -+ } -+ set_task_cpu(p, cpu); -+} -+ -+/* -+ * Returns a descheduling task to the runqueue unless it is being -+ * deactivated. -+ */ -+static inline void return_task(struct task_struct *p, struct rq *rq, -+ int cpu, bool deactivate) -+{ -+ if (deactivate) -+ deactivate_task(p, rq); -+ else { -+#ifdef CONFIG_SMP -+ /* -+ * set_task_cpu was called on the running task that doesn't -+ * want to deactivate so it has to be enqueued to a different -+ * CPU and we need its lock. Tag it to be moved with as the -+ * lock is dropped in finish_lock_switch. -+ */ -+ if (unlikely(p->wake_cpu != cpu)) -+ WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); -+ else -+#endif -+ enqueue_task(rq, p, ENQUEUE_RESTORE); -+ } -+} -+ -+/* Enter with rq lock held. We know p is on the local cpu */ -+static inline void __set_tsk_resched(struct task_struct *p) -+{ -+ set_tsk_need_resched(p); -+ set_preempt_need_resched(); -+} -+ -+/** -+ * task_curr - is this task currently executing on a CPU? -+ * @p: the task in question. -+ * -+ * Return: 1 if the task is currently executing. 0 otherwise. -+ */ -+inline int task_curr(const struct task_struct *p) -+{ -+ return cpu_curr(task_cpu(p)) == p; -+} -+ -+#ifdef CONFIG_SMP -+/* -+ * wait_task_inactive - wait for a thread to unschedule. -+ * -+ * If @match_state is nonzero, it's the @p->state value just checked and -+ * not expected to change. If it changes, i.e. @p might have woken up, -+ * then return zero. When we succeed in waiting for @p to be off its CPU, -+ * we return a positive number (its total switch count). If a second call -+ * a short while later returns the same number, the caller can be sure that -+ * @p has remained unscheduled the whole time. -+ * -+ * The caller must ensure that the task *will* unschedule sometime soon, -+ * else this function might spin for a *long* time. This function can't -+ * be called with interrupts off, or it may introduce deadlock with -+ * smp_call_function() if an IPI is sent by the same process we are -+ * waiting to become inactive. -+ */ -+unsigned long wait_task_inactive(struct task_struct *p, long match_state) -+{ -+ int running, queued; -+ struct rq_flags rf; -+ unsigned long ncsw; -+ struct rq *rq; -+ -+ for (;;) { -+ rq = task_rq(p); -+ -+ /* -+ * If the task is actively running on another CPU -+ * still, just relax and busy-wait without holding -+ * any locks. -+ * -+ * NOTE! Since we don't hold any locks, it's not -+ * even sure that "rq" stays as the right runqueue! -+ * But we don't care, since this will return false -+ * if the runqueue has changed and p is actually now -+ * running somewhere else! -+ */ -+ while (task_running(rq, p)) { -+ if (match_state && unlikely(p->state != match_state)) -+ return 0; -+ cpu_relax(); -+ } -+ -+ /* -+ * Ok, time to look more closely! We need the rq -+ * lock now, to be *sure*. If we're wrong, we'll -+ * just go back and repeat. -+ */ -+ rq = task_rq_lock(p, &rf); -+ trace_sched_wait_task(p); -+ running = task_running(rq, p); -+ queued = task_on_rq_queued(p); -+ ncsw = 0; -+ if (!match_state || p->state == match_state) -+ ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ -+ task_rq_unlock(rq, p, &rf); -+ -+ /* -+ * If it changed from the expected state, bail out now. -+ */ -+ if (unlikely(!ncsw)) -+ break; -+ -+ /* -+ * Was it really running after all now that we -+ * checked with the proper locks actually held? -+ * -+ * Oops. Go back and try again.. -+ */ -+ if (unlikely(running)) { -+ cpu_relax(); -+ continue; -+ } -+ -+ /* -+ * It's not enough that it's not actively running, -+ * it must be off the runqueue _entirely_, and not -+ * preempted! -+ * -+ * So if it was still runnable (but just not actively -+ * running right now), it's preempted, and we should -+ * yield - it could be a while. -+ */ -+ if (unlikely(queued)) { -+ ktime_t to = NSEC_PER_SEC / HZ; -+ -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ schedule_hrtimeout(&to, HRTIMER_MODE_REL); -+ continue; -+ } -+ -+ /* -+ * Ahh, all good. It wasn't running, and it wasn't -+ * runnable, which means that it will never become -+ * running in the future either. We're all done! -+ */ -+ break; -+ } -+ -+ return ncsw; -+} -+ -+/*** -+ * kick_process - kick a running thread to enter/exit the kernel -+ * @p: the to-be-kicked thread -+ * -+ * Cause a process which is running on another CPU to enter -+ * kernel-mode, without any delay. (to get signals handled.) -+ * -+ * NOTE: this function doesn't have to take the runqueue lock, -+ * because all it wants to ensure is that the remote task enters -+ * the kernel. If the IPI races and the task has been migrated -+ * to another CPU then no harm is done and the purpose has been -+ * achieved as well. -+ */ -+void kick_process(struct task_struct *p) -+{ -+ int cpu; -+ -+ preempt_disable(); -+ cpu = task_cpu(p); -+ if ((cpu != smp_processor_id()) && task_curr(p)) -+ smp_sched_reschedule(cpu); -+ preempt_enable(); -+} -+EXPORT_SYMBOL_GPL(kick_process); -+#endif -+ -+/* -+ * RT tasks preempt purely on priority. SCHED_NORMAL tasks preempt on the -+ * basis of earlier deadlines. SCHED_IDLEPRIO don't preempt anything else or -+ * between themselves, they cooperatively multitask. An idle rq scores as -+ * prio PRIO_LIMIT so it is always preempted. -+ */ -+static inline bool -+can_preempt(struct task_struct *p, int prio, u64 deadline) -+{ -+ /* Better static priority RT task or better policy preemption */ -+ if (p->prio < prio) -+ return true; -+ if (p->prio > prio) -+ return false; -+ if (p->policy == SCHED_BATCH) -+ return false; -+ /* SCHED_NORMAL and ISO will preempt based on deadline */ -+ if (!deadline_before(p->deadline, deadline)) -+ return false; -+ return true; -+} -+ -+#ifdef CONFIG_SMP -+ -+static inline bool is_per_cpu_kthread(struct task_struct *p) -+{ -+ if (!(p->flags & PF_KTHREAD)) -+ return false; -+ -+ if (p->nr_cpus_allowed != 1) -+ return false; -+ -+ return true; -+} -+ -+/* -+ * Per-CPU kthreads are allowed to run on !active && online CPUs, see -+ * __set_cpus_allowed_ptr(). -+ */ -+static inline bool is_cpu_allowed(struct task_struct *p, int cpu) -+{ -+ if (!cpumask_test_cpu(cpu, p->cpus_ptr)) -+ return false; -+ -+ if (is_per_cpu_kthread(p)) -+ return cpu_online(cpu); -+ -+ return cpu_active(cpu); -+} -+ -+/* -+ * Check to see if p can run on cpu, and if not, whether there are any online -+ * CPUs it can run on instead. This only happens with the hotplug threads that -+ * bring up the CPUs. -+ */ -+static inline bool sched_other_cpu(struct task_struct *p, int cpu) -+{ -+ if (likely(cpumask_test_cpu(cpu, p->cpus_ptr))) -+ return false; -+ if (p->nr_cpus_allowed == 1) { -+ cpumask_t valid_mask; -+ -+ cpumask_and(&valid_mask, p->cpus_ptr, cpu_online_mask); -+ if (unlikely(cpumask_empty(&valid_mask))) -+ return false; -+ } -+ return true; -+} -+ -+static inline bool needs_other_cpu(struct task_struct *p, int cpu) -+{ -+ if (cpumask_test_cpu(cpu, p->cpus_ptr)) -+ return false; -+ return true; -+} -+ -+#define cpu_online_map (*(cpumask_t *)cpu_online_mask) -+ -+static void try_preempt(struct task_struct *p, struct rq *this_rq) -+{ -+ int i, this_entries = rq_load(this_rq); -+ cpumask_t tmp; -+ -+ if (suitable_idle_cpus(p) && resched_best_idle(p, task_cpu(p))) -+ return; -+ -+ /* IDLEPRIO tasks never preempt anything but idle */ -+ if (p->policy == SCHED_IDLEPRIO) -+ return; -+ -+ cpumask_and(&tmp, &cpu_online_map, p->cpus_ptr); -+ -+ for (i = 0; i < num_online_cpus(); i++) { -+ struct rq *rq = this_rq->cpu_order[i]; -+ -+ if (!cpumask_test_cpu(rq->cpu, &tmp)) -+ continue; -+ -+ if (!sched_interactive && rq != this_rq && rq_load(rq) <= this_entries) -+ continue; -+ if (smt_schedule(p, rq) && can_preempt(p, rq->rq_prio, rq->rq_deadline)) { -+ /* We set rq->preempting lockless, it's a hint only */ -+ rq->preempting = p; -+ resched_curr(rq); -+ return; -+ } -+ } -+} -+ -+static int __set_cpus_allowed_ptr(struct task_struct *p, -+ const struct cpumask *new_mask, bool check); -+#else /* CONFIG_SMP */ -+static inline bool needs_other_cpu(struct task_struct *p, int cpu) -+{ -+ return false; -+} -+ -+static void try_preempt(struct task_struct *p, struct rq *this_rq) -+{ -+ if (p->policy == SCHED_IDLEPRIO) -+ return; -+ if (can_preempt(p, uprq->rq_prio, uprq->rq_deadline)) -+ resched_curr(uprq); -+} -+ -+static inline int __set_cpus_allowed_ptr(struct task_struct *p, -+ const struct cpumask *new_mask, bool check) -+{ -+ return set_cpus_allowed_ptr(p, new_mask); -+} -+#endif /* CONFIG_SMP */ -+ -+/* -+ * wake flags -+ */ -+#define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ -+#define WF_FORK 0x02 /* child wakeup after fork */ -+#define WF_MIGRATED 0x04 /* internal use, task got migrated */ -+ -+static void -+ttwu_stat(struct task_struct *p, int cpu, int wake_flags) -+{ -+ struct rq *rq; -+ -+ if (!schedstat_enabled()) -+ return; -+ -+ rq = this_rq(); -+ -+#ifdef CONFIG_SMP -+ if (cpu == rq->cpu) { -+ __schedstat_inc(rq->ttwu_local); -+ } else { -+ struct sched_domain *sd; -+ -+ rcu_read_lock(); -+ for_each_domain(rq->cpu, sd) { -+ if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { -+ __schedstat_inc(sd->ttwu_wake_remote); -+ break; -+ } -+ } -+ rcu_read_unlock(); -+ } -+ -+#endif /* CONFIG_SMP */ -+ -+ __schedstat_inc(rq->ttwu_count); -+} -+ -+/* -+ * Mark the task runnable and perform wakeup-preemption. -+ */ -+static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) -+{ -+ /* -+ * Sync wakeups (i.e. those types of wakeups where the waker -+ * has indicated that it will leave the CPU in short order) -+ * don't trigger a preemption if there are no idle cpus, -+ * instead waiting for current to deschedule. -+ */ -+ if (wake_flags & WF_SYNC) -+ resched_suitable_idle(p); -+ else -+ try_preempt(p, rq); -+ p->state = TASK_RUNNING; -+ trace_sched_wakeup(p); -+} -+ -+static void -+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) -+{ -+ int en_flags = ENQUEUE_WAKEUP; -+ -+ lockdep_assert_held(rq->lock); -+ -+#ifdef CONFIG_SMP -+ if (p->sched_contributes_to_load) -+ rq->nr_uninterruptible--; -+ -+ if (wake_flags & WF_MIGRATED) -+ en_flags |= ENQUEUE_MIGRATED; -+#endif -+ -+ activate_task(rq, p, en_flags); -+ ttwu_do_wakeup(rq, p, wake_flags); -+} -+ -+/* -+ * Called in case the task @p isn't fully descheduled from its runqueue, -+ * in this case we must do a remote wakeup. Its a 'light' wakeup though, -+ * since all we need to do is flip p->state to TASK_RUNNING, since -+ * the task is still ->on_rq. -+ */ -+static int ttwu_remote(struct task_struct *p, int wake_flags) -+{ -+ struct rq *rq; -+ int ret = 0; -+ -+ rq = __task_rq_lock(p, NULL); -+ if (likely(task_on_rq_queued(p))) { -+ ttwu_do_wakeup(rq, p, wake_flags); -+ ret = 1; -+ } -+ __task_rq_unlock(rq, NULL); -+ -+ return ret; -+} -+ -+#ifdef CONFIG_SMP -+void sched_ttwu_pending(void) -+{ -+ struct rq *rq = this_rq(); -+ struct llist_node *llist = llist_del_all(&rq->wake_list); -+ struct task_struct *p, *t; -+ struct rq_flags rf; -+ -+ if (!llist) -+ return; -+ -+ rq_lock_irqsave(rq, &rf); -+ -+ llist_for_each_entry_safe(p, t, llist, wake_entry) -+ ttwu_do_activate(rq, p, 0); -+ -+ rq_unlock_irqrestore(rq, &rf); -+} -+ -+void scheduler_ipi(void) -+{ -+ /* -+ * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting -+ * TIF_NEED_RESCHED remotely (for the first time) will also send -+ * this IPI. -+ */ -+ preempt_fold_need_resched(); -+ -+ if (llist_empty(&this_rq()->wake_list) && (!idle_cpu(smp_processor_id()) || need_resched())) -+ return; -+ -+ /* -+ * Not all reschedule IPI handlers call irq_enter/irq_exit, since -+ * traditionally all their work was done from the interrupt return -+ * path. Now that we actually do some work, we need to make sure -+ * we do call them. -+ * -+ * Some archs already do call them, luckily irq_enter/exit nest -+ * properly. -+ * -+ * Arguably we should visit all archs and update all handlers, -+ * however a fair share of IPIs are still resched only so this would -+ * somewhat pessimize the simple resched case. -+ */ -+ irq_enter(); -+ sched_ttwu_pending(); -+ irq_exit(); -+} -+ -+static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) -+{ -+ struct rq *rq = cpu_rq(cpu); -+ -+ if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) { -+ if (!set_nr_if_polling(rq->idle)) -+ smp_sched_reschedule(cpu); -+ else -+ trace_sched_wake_idle_without_ipi(cpu); -+ } -+} -+ -+void wake_up_if_idle(int cpu) -+{ -+ struct rq *rq = cpu_rq(cpu); -+ struct rq_flags rf; -+ -+ rcu_read_lock(); -+ -+ if (!is_idle_task(rcu_dereference(rq->curr))) -+ goto out; -+ -+ if (set_nr_if_polling(rq->idle)) { -+ trace_sched_wake_idle_without_ipi(cpu); -+ } else { -+ rq_lock_irqsave(rq, &rf); -+ if (likely(is_idle_task(rq->curr))) -+ smp_sched_reschedule(cpu); -+ /* Else cpu is not in idle, do nothing here */ -+ rq_unlock_irqrestore(rq, &rf); -+ } -+ -+out: -+ rcu_read_unlock(); -+} -+ -+static int valid_task_cpu(struct task_struct *p) -+{ -+ cpumask_t valid_mask; -+ -+ if (p->flags & PF_KTHREAD) -+ cpumask_and(&valid_mask, p->cpus_ptr, cpu_all_mask); -+ else -+ cpumask_and(&valid_mask, p->cpus_ptr, cpu_active_mask); -+ -+ if (unlikely(!cpumask_weight(&valid_mask))) { -+ /* We shouldn't be hitting this any more */ -+ printk(KERN_WARNING "SCHED: No cpumask for %s/%d weight %d\n", p->comm, -+ p->pid, cpumask_weight(p->cpus_ptr)); -+ return cpumask_any(p->cpus_ptr); -+ } -+ return cpumask_any(&valid_mask); -+} -+ -+/* -+ * For a task that's just being woken up we have a valuable balancing -+ * opportunity so choose the nearest cache most lightly loaded runqueue. -+ * Entered with rq locked and returns with the chosen runqueue locked. -+ */ -+static inline int select_best_cpu(struct task_struct *p) -+{ -+ unsigned int idlest = ~0U; -+ struct rq *rq = NULL; -+ int i; -+ -+ if (suitable_idle_cpus(p)) { -+ int cpu = task_cpu(p); -+ -+ if (unlikely(needs_other_cpu(p, cpu))) -+ cpu = valid_task_cpu(p); -+ rq = resched_best_idle(p, cpu); -+ if (likely(rq)) -+ return rq->cpu; -+ } -+ -+ for (i = 0; i < num_online_cpus(); i++) { -+ struct rq *other_rq = task_rq(p)->cpu_order[i]; -+ int entries; -+ -+ if (!other_rq->online) -+ continue; -+ if (needs_other_cpu(p, other_rq->cpu)) -+ continue; -+ entries = rq_load(other_rq); -+ if (entries >= idlest) -+ continue; -+ idlest = entries; -+ rq = other_rq; -+ } -+ if (unlikely(!rq)) -+ return task_cpu(p); -+ return rq->cpu; -+} -+#else /* CONFIG_SMP */ -+static int valid_task_cpu(struct task_struct *p) -+{ -+ return 0; -+} -+ -+static inline int select_best_cpu(struct task_struct *p) -+{ -+ return 0; -+} -+ -+static struct rq *resched_best_idle(struct task_struct *p, int cpu) -+{ -+ return NULL; -+} -+#endif /* CONFIG_SMP */ -+ -+static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) -+{ -+ struct rq *rq = cpu_rq(cpu); -+ -+#if defined(CONFIG_SMP) -+ if (!cpus_share_cache(smp_processor_id(), cpu)) { -+ sched_clock_cpu(cpu); /* Sync clocks across CPUs */ -+ ttwu_queue_remote(p, cpu, wake_flags); -+ return; -+ } -+#endif -+ rq_lock(rq); -+ ttwu_do_activate(rq, p, wake_flags); -+ rq_unlock(rq); -+} -+ -+/*** -+ * try_to_wake_up - wake up a thread -+ * @p: the thread to be awakened -+ * @state: the mask of task states that can be woken -+ * @wake_flags: wake modifier flags (WF_*) -+ * -+ * Put it on the run-queue if it's not already there. The "current" -+ * thread is always on the run-queue (except when the actual -+ * re-schedule is in progress), and as such you're allowed to do -+ * the simpler "current->state = TASK_RUNNING" to mark yourself -+ * runnable without the overhead of this. -+ * -+ * Return: %true if @p was woken up, %false if it was already running. -+ * or @state didn't match @p's state. -+ */ -+static int -+try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) -+{ -+ unsigned long flags; -+ int cpu, success = 0; -+ -+ preempt_disable(); -+ if (p == current) { -+ /* -+ * We're waking current, this means 'p->on_rq' and 'task_cpu(p) -+ * == smp_processor_id()'. Together this means we can special -+ * case the whole 'p->on_rq && ttwu_remote()' case below -+ * without taking any locks. -+ * -+ * In particular: -+ * - we rely on Program-Order guarantees for all the ordering, -+ * - we're serialized against set_special_state() by virtue of -+ * it disabling IRQs (this allows not taking ->pi_lock). -+ */ -+ if (!(p->state & state)) -+ goto out; -+ -+ success = 1; -+ cpu = task_cpu(p); -+ trace_sched_waking(p); -+ p->state = TASK_RUNNING; -+ trace_sched_wakeup(p); -+ goto out; -+ } -+ -+ /* -+ * If we are going to wake up a thread waiting for CONDITION we -+ * need to ensure that CONDITION=1 done by the caller can not be -+ * reordered with p->state check below. This pairs with mb() in -+ * set_current_state() the waiting thread does. -+ */ -+ raw_spin_lock_irqsave(&p->pi_lock, flags); -+ smp_mb__after_spinlock(); -+ if (!(p->state & state)) -+ goto unlock; -+ -+ trace_sched_waking(p); -+ -+ /* We're going to change ->state: */ -+ success = 1; -+ cpu = task_cpu(p); -+ -+ /* -+ * Ensure we load p->on_rq _after_ p->state, otherwise it would -+ * be possible to, falsely, observe p->on_rq == 0 and get stuck -+ * in smp_cond_load_acquire() below. -+ * -+ * sched_ttwu_pending() try_to_wake_up() -+ * STORE p->on_rq = 1 LOAD p->state -+ * UNLOCK rq->lock -+ * -+ * __schedule() (switch to task 'p') -+ * LOCK rq->lock smp_rmb(); -+ * smp_mb__after_spinlock(); -+ * UNLOCK rq->lock -+ * -+ * [task p] -+ * STORE p->state = UNINTERRUPTIBLE LOAD p->on_rq -+ * -+ * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in -+ * __schedule(). See the comment for smp_mb__after_spinlock(). -+ */ -+ smp_rmb(); -+ if (p->on_rq && ttwu_remote(p, wake_flags)) -+ goto unlock; -+ -+#ifdef CONFIG_SMP -+ /* -+ * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be -+ * possible to, falsely, observe p->on_cpu == 0. -+ * -+ * One must be running (->on_cpu == 1) in order to remove oneself -+ * from the runqueue. -+ * -+ * __schedule() (switch to task 'p') try_to_wake_up() -+ * STORE p->on_cpu = 1 LOAD p->on_rq -+ * UNLOCK rq->lock -+ * -+ * __schedule() (put 'p' to sleep) -+ * LOCK rq->lock smp_rmb(); -+ * smp_mb__after_spinlock(); -+ * STORE p->on_rq = 0 LOAD p->on_cpu -+ * -+ * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in -+ * __schedule(). See the comment for smp_mb__after_spinlock(). -+ */ -+ smp_rmb(); -+ -+ /* -+ * If the owning (remote) CPU is still in the middle of schedule() with -+ * this task as prev, wait until its done referencing the task. -+ * -+ * Pairs with the smp_store_release() in finish_task(). -+ * -+ * This ensures that tasks getting woken will be fully ordered against -+ * their previous state and preserve Program Order. -+ */ -+ smp_cond_load_acquire(&p->on_cpu, !VAL); -+ -+ p->sched_contributes_to_load = !!task_contributes_to_load(p); -+ p->state = TASK_WAKING; -+ -+ if (p->in_iowait) { -+ delayacct_blkio_end(p); -+ atomic_dec(&task_rq(p)->nr_iowait); -+ } -+ -+ cpu = select_best_cpu(p); -+ if (task_cpu(p) != cpu) { -+ wake_flags |= WF_MIGRATED; -+ psi_ttwu_dequeue(p); -+ set_task_cpu(p, cpu); -+ } -+ -+#else /* CONFIG_SMP */ -+ -+ if (p->in_iowait) { -+ delayacct_blkio_end(p); -+ atomic_dec(&task_rq(p)->nr_iowait); -+ } -+ -+#endif /* CONFIG_SMP */ -+ -+ ttwu_queue(p, cpu, wake_flags); -+unlock: -+ raw_spin_unlock_irqrestore(&p->pi_lock, flags); -+out: -+ if (success) -+ ttwu_stat(p, cpu, wake_flags); -+ preempt_enable(); -+ -+ return success; -+} -+ -+/** -+ * wake_up_process - Wake up a specific process -+ * @p: The process to be woken up. -+ * -+ * Attempt to wake up the nominated process and move it to the set of runnable -+ * processes. -+ * -+ * Return: 1 if the process was woken up, 0 if it was already running. -+ * -+ * This function executes a full memory barrier before accessing the task state. -+ */ -+int wake_up_process(struct task_struct *p) -+{ -+ return try_to_wake_up(p, TASK_NORMAL, 0); -+} -+EXPORT_SYMBOL(wake_up_process); -+ -+int wake_up_state(struct task_struct *p, unsigned int state) -+{ -+ return try_to_wake_up(p, state, 0); -+} -+ -+static void time_slice_expired(struct task_struct *p, struct rq *rq); -+ -+/* -+ * Perform scheduler related setup for a newly forked process p. -+ * p is forked by current. -+ */ -+int sched_fork(unsigned long __maybe_unused clone_flags, struct task_struct *p) -+{ -+ unsigned long flags; -+ -+#ifdef CONFIG_PREEMPT_NOTIFIERS -+ INIT_HLIST_HEAD(&p->preempt_notifiers); -+#endif -+ -+#ifdef CONFIG_COMPACTION -+ p->capture_control = NULL; -+#endif -+ -+ /* -+ * We mark the process as NEW here. This guarantees that -+ * nobody will actually run it, and a signal or other external -+ * event cannot wake it up and insert it on the runqueue either. -+ */ -+ p->state = TASK_NEW; -+ -+ /* -+ * The process state is set to the same value of the process executing -+ * do_fork() code. That is running. This guarantees that nobody will -+ * actually run it, and a signal or other external event cannot wake -+ * it up and insert it on the runqueue either. -+ */ -+ -+ /* Should be reset in fork.c but done here for ease of MuQSS patching */ -+ p->on_cpu = -+ p->on_rq = -+ p->utime = -+ p->stime = -+ p->sched_time = -+ p->stime_ns = -+ p->utime_ns = 0; -+ skiplist_node_init(&p->node); -+ -+ /* -+ * Revert to default priority/policy on fork if requested. -+ */ -+ if (unlikely(p->sched_reset_on_fork)) { -+ if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) { -+ p->policy = SCHED_NORMAL; -+ p->normal_prio = normal_prio(p); -+ } -+ -+ if (PRIO_TO_NICE(p->static_prio) < 0) { -+ p->static_prio = NICE_TO_PRIO(0); -+ p->normal_prio = p->static_prio; -+ } -+ -+ /* -+ * We don't need the reset flag anymore after the fork. It has -+ * fulfilled its duty: -+ */ -+ p->sched_reset_on_fork = 0; -+ } -+ -+ /* -+ * Silence PROVE_RCU. -+ */ -+ raw_spin_lock_irqsave(&p->pi_lock, flags); -+ set_task_cpu(p, smp_processor_id()); -+ raw_spin_unlock_irqrestore(&p->pi_lock, flags); -+ -+#ifdef CONFIG_SCHED_INFO -+ if (unlikely(sched_info_on())) -+ memset(&p->sched_info, 0, sizeof(p->sched_info)); -+#endif -+ init_task_preempt_count(p); -+ -+ return 0; -+} -+ -+#ifdef CONFIG_SCHEDSTATS -+ -+DEFINE_STATIC_KEY_FALSE(sched_schedstats); -+static bool __initdata __sched_schedstats = false; -+ -+static void set_schedstats(bool enabled) -+{ -+ if (enabled) -+ static_branch_enable(&sched_schedstats); -+ else -+ static_branch_disable(&sched_schedstats); -+} -+ -+void force_schedstat_enabled(void) -+{ -+ if (!schedstat_enabled()) { -+ pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n"); -+ static_branch_enable(&sched_schedstats); -+ } -+} -+ -+static int __init setup_schedstats(char *str) -+{ -+ int ret = 0; -+ if (!str) -+ goto out; -+ -+ /* -+ * This code is called before jump labels have been set up, so we can't -+ * change the static branch directly just yet. Instead set a temporary -+ * variable so init_schedstats() can do it later. -+ */ -+ if (!strcmp(str, "enable")) { -+ __sched_schedstats = true; -+ ret = 1; -+ } else if (!strcmp(str, "disable")) { -+ __sched_schedstats = false; -+ ret = 1; -+ } -+out: -+ if (!ret) -+ pr_warn("Unable to parse schedstats=\n"); -+ -+ return ret; -+} -+__setup("schedstats=", setup_schedstats); -+ -+static void __init init_schedstats(void) -+{ -+ set_schedstats(__sched_schedstats); -+} -+ -+#ifdef CONFIG_PROC_SYSCTL -+int sysctl_schedstats(struct ctl_table *table, int write, -+ void __user *buffer, size_t *lenp, loff_t *ppos) -+{ -+ struct ctl_table t; -+ int err; -+ int state = static_branch_likely(&sched_schedstats); -+ -+ if (write && !capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ t = *table; -+ t.data = &state; -+ err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); -+ if (err < 0) -+ return err; -+ if (write) -+ set_schedstats(state); -+ return err; -+} -+#endif /* CONFIG_PROC_SYSCTL */ -+#else /* !CONFIG_SCHEDSTATS */ -+static inline void init_schedstats(void) {} -+#endif /* CONFIG_SCHEDSTATS */ -+ -+static void update_cpu_clock_switch(struct rq *rq, struct task_struct *p); -+ -+static void account_task_cpu(struct rq *rq, struct task_struct *p) -+{ -+ update_clocks(rq); -+ /* This isn't really a context switch but accounting is the same */ -+ update_cpu_clock_switch(rq, p); -+ p->last_ran = rq->niffies; -+} -+ -+bool sched_smp_initialized __read_mostly; -+ -+static inline int hrexpiry_enabled(struct rq *rq) -+{ -+ if (unlikely(!cpu_active(cpu_of(rq)) || !sched_smp_initialized)) -+ return 0; -+ return hrtimer_is_hres_active(&rq->hrexpiry_timer); -+} -+ -+/* -+ * Use HR-timers to deliver accurate preemption points. -+ */ -+static inline void hrexpiry_clear(struct rq *rq) -+{ -+ if (!hrexpiry_enabled(rq)) -+ return; -+ if (hrtimer_active(&rq->hrexpiry_timer)) -+ hrtimer_cancel(&rq->hrexpiry_timer); -+} -+ -+/* -+ * High-resolution time_slice expiry. -+ * Runs from hardirq context with interrupts disabled. -+ */ -+static enum hrtimer_restart hrexpiry(struct hrtimer *timer) -+{ -+ struct rq *rq = container_of(timer, struct rq, hrexpiry_timer); -+ struct task_struct *p; -+ -+ /* This can happen during CPU hotplug / resume */ -+ if (unlikely(cpu_of(rq) != smp_processor_id())) -+ goto out; -+ -+ /* -+ * We're doing this without the runqueue lock but this should always -+ * be run on the local CPU. Time slice should run out in __schedule -+ * but we set it to zero here in case niffies is slightly less. -+ */ -+ p = rq->curr; -+ p->time_slice = 0; -+ __set_tsk_resched(p); -+out: -+ return HRTIMER_NORESTART; -+} -+ -+/* -+ * Called to set the hrexpiry timer state. -+ * -+ * called with irqs disabled from the local CPU only -+ */ -+static void hrexpiry_start(struct rq *rq, u64 delay) -+{ -+ if (!hrexpiry_enabled(rq)) -+ return; -+ -+ hrtimer_start(&rq->hrexpiry_timer, ns_to_ktime(delay), -+ HRTIMER_MODE_REL_PINNED); -+} -+ -+static void init_rq_hrexpiry(struct rq *rq) -+{ -+ hrtimer_init(&rq->hrexpiry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); -+ rq->hrexpiry_timer.function = hrexpiry; -+} -+ -+static inline int rq_dither(struct rq *rq) -+{ -+ if (!hrexpiry_enabled(rq)) -+ return HALF_JIFFY_US; -+ return 0; -+} -+ -+/* -+ * wake_up_new_task - wake up a newly created task for the first time. -+ * -+ * This function will do some initial scheduler statistics housekeeping -+ * that must be done for every newly created context, then puts the task -+ * on the runqueue and wakes it. -+ */ -+void wake_up_new_task(struct task_struct *p) -+{ -+ struct task_struct *parent, *rq_curr; -+ struct rq *rq, *new_rq; -+ unsigned long flags; -+ -+ parent = p->parent; -+ -+ raw_spin_lock_irqsave(&p->pi_lock, flags); -+ p->state = TASK_RUNNING; -+ /* Task_rq can't change yet on a new task */ -+ new_rq = rq = task_rq(p); -+ if (unlikely(needs_other_cpu(p, task_cpu(p)))) { -+ set_task_cpu(p, valid_task_cpu(p)); -+ new_rq = task_rq(p); -+ } -+ -+ double_rq_lock(rq, new_rq); -+ rq_curr = rq->curr; -+ -+ /* -+ * Make sure we do not leak PI boosting priority to the child. -+ */ -+ p->prio = rq_curr->normal_prio; -+ -+ trace_sched_wakeup_new(p); -+ -+ /* -+ * Share the timeslice between parent and child, thus the -+ * total amount of pending timeslices in the system doesn't change, -+ * resulting in more scheduling fairness. If it's negative, it won't -+ * matter since that's the same as being 0. rq->rq_deadline is only -+ * modified within schedule() so it is always equal to -+ * current->deadline. -+ */ -+ account_task_cpu(rq, rq_curr); -+ p->last_ran = rq_curr->last_ran; -+ if (likely(rq_curr->policy != SCHED_FIFO)) { -+ rq_curr->time_slice /= 2; -+ if (rq_curr->time_slice < RESCHED_US) { -+ /* -+ * Forking task has run out of timeslice. Reschedule it and -+ * start its child with a new time slice and deadline. The -+ * child will end up running first because its deadline will -+ * be slightly earlier. -+ */ -+ __set_tsk_resched(rq_curr); -+ time_slice_expired(p, new_rq); -+ if (suitable_idle_cpus(p)) -+ resched_best_idle(p, task_cpu(p)); -+ else if (unlikely(rq != new_rq)) -+ try_preempt(p, new_rq); -+ } else { -+ p->time_slice = rq_curr->time_slice; -+ if (rq_curr == parent && rq == new_rq && !suitable_idle_cpus(p)) { -+ /* -+ * The VM isn't cloned, so we're in a good position to -+ * do child-runs-first in anticipation of an exec. This -+ * usually avoids a lot of COW overhead. -+ */ -+ __set_tsk_resched(rq_curr); -+ } else { -+ /* -+ * Adjust the hrexpiry since rq_curr will keep -+ * running and its timeslice has been shortened. -+ */ -+ hrexpiry_start(rq, US_TO_NS(rq_curr->time_slice)); -+ try_preempt(p, new_rq); -+ } -+ } -+ } else { -+ time_slice_expired(p, new_rq); -+ try_preempt(p, new_rq); -+ } -+ activate_task(new_rq, p, 0); -+ double_rq_unlock(rq, new_rq); -+ raw_spin_unlock_irqrestore(&p->pi_lock, flags); -+} -+ -+#ifdef CONFIG_PREEMPT_NOTIFIERS -+ -+static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key); -+ -+void preempt_notifier_inc(void) -+{ -+ static_branch_inc(&preempt_notifier_key); -+} -+EXPORT_SYMBOL_GPL(preempt_notifier_inc); -+ -+void preempt_notifier_dec(void) -+{ -+ static_branch_dec(&preempt_notifier_key); -+} -+EXPORT_SYMBOL_GPL(preempt_notifier_dec); -+ -+/** -+ * preempt_notifier_register - tell me when current is being preempted & rescheduled -+ * @notifier: notifier struct to register -+ */ -+void preempt_notifier_register(struct preempt_notifier *notifier) -+{ -+ if (!static_branch_unlikely(&preempt_notifier_key)) -+ WARN(1, "registering preempt_notifier while notifiers disabled\n"); -+ -+ hlist_add_head(¬ifier->link, ¤t->preempt_notifiers); -+} -+EXPORT_SYMBOL_GPL(preempt_notifier_register); -+ -+/** -+ * preempt_notifier_unregister - no longer interested in preemption notifications -+ * @notifier: notifier struct to unregister -+ * -+ * This is *not* safe to call from within a preemption notifier. -+ */ -+void preempt_notifier_unregister(struct preempt_notifier *notifier) -+{ -+ hlist_del(¬ifier->link); -+} -+EXPORT_SYMBOL_GPL(preempt_notifier_unregister); -+ -+static void __fire_sched_in_preempt_notifiers(struct task_struct *curr) -+{ -+ struct preempt_notifier *notifier; -+ -+ hlist_for_each_entry(notifier, &curr->preempt_notifiers, link) -+ notifier->ops->sched_in(notifier, raw_smp_processor_id()); -+} -+ -+static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr) -+{ -+ if (static_branch_unlikely(&preempt_notifier_key)) -+ __fire_sched_in_preempt_notifiers(curr); -+} -+ -+static void -+__fire_sched_out_preempt_notifiers(struct task_struct *curr, -+ struct task_struct *next) -+{ -+ struct preempt_notifier *notifier; -+ -+ hlist_for_each_entry(notifier, &curr->preempt_notifiers, link) -+ notifier->ops->sched_out(notifier, next); -+} -+ -+static __always_inline void -+fire_sched_out_preempt_notifiers(struct task_struct *curr, -+ struct task_struct *next) -+{ -+ if (static_branch_unlikely(&preempt_notifier_key)) -+ __fire_sched_out_preempt_notifiers(curr, next); -+} -+ -+#else /* !CONFIG_PREEMPT_NOTIFIERS */ -+ -+static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr) -+{ -+} -+ -+static inline void -+fire_sched_out_preempt_notifiers(struct task_struct *curr, -+ struct task_struct *next) -+{ -+} -+ -+#endif /* CONFIG_PREEMPT_NOTIFIERS */ -+ -+static inline void prepare_task(struct task_struct *next) -+{ -+ /* -+ * Claim the task as running, we do this before switching to it -+ * such that any running task will have this set. -+ */ -+ next->on_cpu = 1; -+} -+ -+static inline void finish_task(struct task_struct *prev) -+{ -+#ifdef CONFIG_SMP -+ /* -+ * After ->on_cpu is cleared, the task can be moved to a different CPU. -+ * We must ensure this doesn't happen until the switch is completely -+ * finished. -+ * -+ * In particular, the load of prev->state in finish_task_switch() must -+ * happen before this. -+ * -+ * Pairs with the smp_cond_load_acquire() in try_to_wake_up(). -+ */ -+ smp_store_release(&prev->on_cpu, 0); -+#endif -+} -+ -+static inline void -+prepare_lock_switch(struct rq *rq, struct task_struct *next) -+{ -+ /* -+ * Since the runqueue lock will be released by the next -+ * task (which is an invalid locking op but in the case -+ * of the scheduler it's an obvious special-case), so we -+ * do an early lockdep release here: -+ */ -+ spin_release(&rq->lock->dep_map, 1, _THIS_IP_); -+#ifdef CONFIG_DEBUG_SPINLOCK -+ /* this is a valid case when another task releases the spinlock */ -+ rq->lock->owner = next; -+#endif -+} -+ -+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) -+{ -+ /* -+ * If we are tracking spinlock dependencies then we have to -+ * fix up the runqueue lock - which gets 'carried over' from -+ * prev into current: -+ */ -+ spin_acquire(&rq->lock->dep_map, 0, 0, _THIS_IP_); -+ -+#ifdef CONFIG_SMP -+ /* -+ * If prev was marked as migrating to another CPU in return_task, drop -+ * the local runqueue lock but leave interrupts disabled and grab the -+ * remote lock we're migrating it to before enabling them. -+ */ -+ if (unlikely(task_on_rq_migrating(prev))) { -+ sched_info_dequeued(rq, prev); -+ /* -+ * We move the ownership of prev to the new cpu now. ttwu can't -+ * activate prev to the wrong cpu since it has to grab this -+ * runqueue in ttwu_remote. -+ */ -+#ifdef CONFIG_THREAD_INFO_IN_TASK -+ prev->cpu = prev->wake_cpu; -+#else -+ task_thread_info(prev)->cpu = prev->wake_cpu; -+#endif -+ raw_spin_unlock(rq->lock); -+ -+ raw_spin_lock(&prev->pi_lock); -+ rq = __task_rq_lock(prev, NULL); -+ /* Check that someone else hasn't already queued prev */ -+ if (likely(!task_queued(prev))) { -+ enqueue_task(rq, prev, 0); -+ prev->on_rq = TASK_ON_RQ_QUEUED; -+ /* Wake up the CPU if it's not already running */ -+ resched_if_idle(rq); -+ } -+ raw_spin_unlock(&prev->pi_lock); -+ } -+#endif -+ rq_unlock(rq); -+ -+ do_pending_softirq(rq, current); -+ -+ local_irq_enable(); -+} -+ -+#ifndef prepare_arch_switch -+# define prepare_arch_switch(next) do { } while (0) -+#endif -+#ifndef finish_arch_switch -+# define finish_arch_switch(prev) do { } while (0) -+#endif -+#ifndef finish_arch_post_lock_switch -+# define finish_arch_post_lock_switch() do { } while (0) -+#endif -+ -+/** -+ * prepare_task_switch - prepare to switch tasks -+ * @rq: the runqueue preparing to switch -+ * @next: the task we are going to switch to. -+ * -+ * This is called with the rq lock held and interrupts off. It must -+ * be paired with a subsequent finish_task_switch after the context -+ * switch. -+ * -+ * prepare_task_switch sets up locking and calls architecture specific -+ * hooks. -+ */ -+static inline void -+prepare_task_switch(struct rq *rq, struct task_struct *prev, -+ struct task_struct *next) -+{ -+ kcov_prepare_switch(prev); -+ sched_info_switch(rq, prev, next); -+ perf_event_task_sched_out(prev, next); -+ rseq_preempt(prev); -+ fire_sched_out_preempt_notifiers(prev, next); -+ prepare_task(next); -+ prepare_arch_switch(next); -+} -+ -+/** -+ * finish_task_switch - clean up after a task-switch -+ * @rq: runqueue associated with task-switch -+ * @prev: the thread we just switched away from. -+ * -+ * finish_task_switch must be called after the context switch, paired -+ * with a prepare_task_switch call before the context switch. -+ * finish_task_switch will reconcile locking set up by prepare_task_switch, -+ * and do any other architecture-specific cleanup actions. -+ * -+ * Note that we may have delayed dropping an mm in context_switch(). If -+ * so, we finish that here outside of the runqueue lock. (Doing it -+ * with the lock held can cause deadlocks; see schedule() for -+ * details.) -+ * -+ * The context switch have flipped the stack from under us and restored the -+ * local variables which were saved when this task called schedule() in the -+ * past. prev == current is still correct but we need to recalculate this_rq -+ * because prev may have moved to another CPU. -+ */ -+static void finish_task_switch(struct task_struct *prev) -+ __releases(rq->lock) -+{ -+ struct rq *rq = this_rq(); -+ struct mm_struct *mm = rq->prev_mm; -+ long prev_state; -+ -+ /* -+ * The previous task will have left us with a preempt_count of 2 -+ * because it left us after: -+ * -+ * schedule() -+ * preempt_disable(); // 1 -+ * __schedule() -+ * raw_spin_lock_irq(rq->lock) // 2 -+ * -+ * Also, see FORK_PREEMPT_COUNT. -+ */ -+ if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET, -+ "corrupted preempt_count: %s/%d/0x%x\n", -+ current->comm, current->pid, preempt_count())) -+ preempt_count_set(FORK_PREEMPT_COUNT); -+ -+ rq->prev_mm = NULL; -+ -+ /* -+ * A task struct has one reference for the use as "current". -+ * If a task dies, then it sets TASK_DEAD in tsk->state and calls -+ * schedule one last time. The schedule call will never return, and -+ * the scheduled task must drop that reference. -+ * -+ * We must observe prev->state before clearing prev->on_cpu (in -+ * finish_task), otherwise a concurrent wakeup can get prev -+ * running on another CPU and we could rave with its RUNNING -> DEAD -+ * transition, resulting in a double drop. -+ */ -+ prev_state = prev->state; -+ vtime_task_switch(prev); -+ perf_event_task_sched_in(prev, current); -+ finish_task(prev); -+ finish_lock_switch(rq, prev); -+ finish_arch_post_lock_switch(); -+ kcov_finish_switch(current); -+ -+ fire_sched_in_preempt_notifiers(current); -+ /* -+ * When switching through a kernel thread, the loop in -+ * membarrier_{private,global}_expedited() may have observed that -+ * kernel thread and not issued an IPI. It is therefore possible to -+ * schedule between user->kernel->user threads without passing though -+ * switch_mm(). Membarrier requires a barrier after storing to -+ * rq->curr, before returning to userspace, so provide them here: -+ * -+ * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly -+ * provided by mmdrop(), -+ * - a sync_core for SYNC_CORE. -+ */ -+ if (mm) { -+ membarrier_mm_sync_core_before_usermode(mm); -+ mmdrop(mm); -+ } -+ if (unlikely(prev_state == TASK_DEAD)) { -+ /* -+ * Remove function-return probe instances associated with this -+ * task and put them back on the free list. -+ */ -+ kprobe_flush_task(prev); -+ -+ /* Task is done with its stack. */ -+ put_task_stack(prev); -+ -+ put_task_struct_rcu_user(prev); -+ } -+} -+ -+/** -+ * schedule_tail - first thing a freshly forked thread must call. -+ * @prev: the thread we just switched away from. -+ */ -+asmlinkage __visible void schedule_tail(struct task_struct *prev) -+{ -+ /* -+ * New tasks start with FORK_PREEMPT_COUNT, see there and -+ * finish_task_switch() for details. -+ * -+ * finish_task_switch() will drop rq->lock() and lower preempt_count -+ * and the preempt_enable() will end up enabling preemption (on -+ * PREEMPT_COUNT kernels). -+ */ -+ -+ finish_task_switch(prev); -+ preempt_enable(); -+ -+ if (current->set_child_tid) -+ put_user(task_pid_vnr(current), current->set_child_tid); -+ -+ calculate_sigpending(); -+} -+ -+/* -+ * context_switch - switch to the new MM and the new thread's register state. -+ */ -+static __always_inline void -+context_switch(struct rq *rq, struct task_struct *prev, -+ struct task_struct *next) -+{ -+ prepare_task_switch(rq, prev, next); -+ -+ /* -+ * For paravirt, this is coupled with an exit in switch_to to -+ * combine the page table reload and the switch backend into -+ * one hypercall. -+ */ -+ arch_start_context_switch(prev); -+ -+ /* -+ * kernel -> kernel lazy + transfer active -+ * user -> kernel lazy + mmgrab() active -+ * -+ * kernel -> user switch + mmdrop() active -+ * user -> user switch -+ */ -+ if (!next->mm) { // to kernel -+ enter_lazy_tlb(prev->active_mm, next); -+ -+ next->active_mm = prev->active_mm; -+ if (prev->mm) // from user -+ mmgrab(prev->active_mm); -+ else -+ prev->active_mm = NULL; -+ } else { // to user -+ membarrier_switch_mm(rq, prev->active_mm, next->mm); -+ /* -+ * sys_membarrier() requires an smp_mb() between setting -+ * rq->curr / membarrier_switch_mm() and returning to userspace. -+ * -+ * The below provides this either through switch_mm(), or in -+ * case 'prev->active_mm == next->mm' through -+ * finish_task_switch()'s mmdrop(). -+ */ -+ switch_mm_irqs_off(prev->active_mm, next->mm, next); -+ -+ if (!prev->mm) { // from kernel -+ /* will mmdrop() in finish_task_switch(). */ -+ rq->prev_mm = prev->active_mm; -+ prev->active_mm = NULL; -+ } -+ } -+ prepare_lock_switch(rq, next); -+ -+ /* Here we just switch the register state and the stack. */ -+ switch_to(prev, next, prev); -+ barrier(); -+ -+ finish_task_switch(prev); -+} -+ -+/* -+ * nr_running, nr_uninterruptible and nr_context_switches: -+ * -+ * externally visible scheduler statistics: current number of runnable -+ * threads, total number of context switches performed since bootup. -+ */ -+unsigned long nr_running(void) -+{ -+ unsigned long i, sum = 0; -+ -+ for_each_online_cpu(i) -+ sum += cpu_rq(i)->nr_running; -+ -+ return sum; -+} -+ -+static unsigned long nr_uninterruptible(void) -+{ -+ unsigned long i, sum = 0; -+ -+ for_each_online_cpu(i) -+ sum += cpu_rq(i)->nr_uninterruptible; -+ -+ return sum; -+} -+ -+/* -+ * Check if only the current task is running on the CPU. -+ * -+ * Caution: this function does not check that the caller has disabled -+ * preemption, thus the result might have a time-of-check-to-time-of-use -+ * race. The caller is responsible to use it correctly, for example: -+ * -+ * - from a non-preemptible section (of course) -+ * -+ * - from a thread that is bound to a single CPU -+ * -+ * - in a loop with very short iterations (e.g. a polling loop) -+ */ -+bool single_task_running(void) -+{ -+ if (rq_load(raw_rq()) == 1) -+ return true; -+ else -+ return false; -+} -+EXPORT_SYMBOL(single_task_running); -+ -+unsigned long long nr_context_switches(void) -+{ -+ int cpu; -+ unsigned long long sum = 0; -+ -+ for_each_possible_cpu(cpu) -+ sum += cpu_rq(cpu)->nr_switches; -+ -+ return sum; -+} -+ -+/* -+ * Consumers of these two interfaces, like for example the cpufreq menu -+ * governor are using nonsensical data. Boosting frequency for a CPU that has -+ * IO-wait which might not even end up running the task when it does become -+ * runnable. -+ */ -+ -+unsigned long nr_iowait_cpu(int cpu) -+{ -+ return atomic_read(&cpu_rq(cpu)->nr_iowait); -+} -+ -+/* -+ * IO-wait accounting, and how its mostly bollocks (on SMP). -+ * -+ * The idea behind IO-wait account is to account the idle time that we could -+ * have spend running if it were not for IO. That is, if we were to improve the -+ * storage performance, we'd have a proportional reduction in IO-wait time. -+ * -+ * This all works nicely on UP, where, when a task blocks on IO, we account -+ * idle time as IO-wait, because if the storage were faster, it could've been -+ * running and we'd not be idle. -+ * -+ * This has been extended to SMP, by doing the same for each CPU. This however -+ * is broken. -+ * -+ * Imagine for instance the case where two tasks block on one CPU, only the one -+ * CPU will have IO-wait accounted, while the other has regular idle. Even -+ * though, if the storage were faster, both could've ran at the same time, -+ * utilising both CPUs. -+ * -+ * This means, that when looking globally, the current IO-wait accounting on -+ * SMP is a lower bound, by reason of under accounting. -+ * -+ * Worse, since the numbers are provided per CPU, they are sometimes -+ * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly -+ * associated with any one particular CPU, it can wake to another CPU than it -+ * blocked on. This means the per CPU IO-wait number is meaningless. -+ * -+ * Task CPU affinities can make all that even more 'interesting'. -+ */ -+ -+unsigned long nr_iowait(void) -+{ -+ unsigned long cpu, sum = 0; -+ -+ for_each_possible_cpu(cpu) -+ sum += nr_iowait_cpu(cpu); -+ -+ return sum; -+} -+ -+unsigned long nr_active(void) -+{ -+ return nr_running() + nr_uninterruptible(); -+} -+ -+/* Variables and functions for calc_load */ -+static unsigned long calc_load_update; -+unsigned long avenrun[3]; -+EXPORT_SYMBOL(avenrun); -+ -+/** -+ * get_avenrun - get the load average array -+ * @loads: pointer to dest load array -+ * @offset: offset to add -+ * @shift: shift count to shift the result left -+ * -+ * These values are estimates at best, so no need for locking. -+ */ -+void get_avenrun(unsigned long *loads, unsigned long offset, int shift) -+{ -+ loads[0] = (avenrun[0] + offset) << shift; -+ loads[1] = (avenrun[1] + offset) << shift; -+ loads[2] = (avenrun[2] + offset) << shift; -+} -+ -+/* -+ * calc_load - update the avenrun load estimates every LOAD_FREQ seconds. -+ */ -+void calc_global_load(unsigned long ticks) -+{ -+ long active; -+ -+ if (time_before(jiffies, READ_ONCE(calc_load_update))) -+ return; -+ active = nr_active() * FIXED_1; -+ -+ avenrun[0] = calc_load(avenrun[0], EXP_1, active); -+ avenrun[1] = calc_load(avenrun[1], EXP_5, active); -+ avenrun[2] = calc_load(avenrun[2], EXP_15, active); -+ -+ calc_load_update = jiffies + LOAD_FREQ; -+} -+ -+/** -+ * fixed_power_int - compute: x^n, in O(log n) time -+ * -+ * @x: base of the power -+ * @frac_bits: fractional bits of @x -+ * @n: power to raise @x to. -+ * -+ * By exploiting the relation between the definition of the natural power -+ * function: x^n := x*x*...*x (x multiplied by itself for n times), and -+ * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i, -+ * (where: n_i \elem {0, 1}, the binary vector representing n), -+ * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is -+ * of course trivially computable in O(log_2 n), the length of our binary -+ * vector. -+ */ -+static unsigned long -+fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n) -+{ -+ unsigned long result = 1UL << frac_bits; -+ -+ if (n) { -+ for (;;) { -+ if (n & 1) { -+ result *= x; -+ result += 1UL << (frac_bits - 1); -+ result >>= frac_bits; -+ } -+ n >>= 1; -+ if (!n) -+ break; -+ x *= x; -+ x += 1UL << (frac_bits - 1); -+ x >>= frac_bits; -+ } -+ } -+ -+ return result; -+} -+ -+/* -+ * a1 = a0 * e + a * (1 - e) -+ * -+ * a2 = a1 * e + a * (1 - e) -+ * = (a0 * e + a * (1 - e)) * e + a * (1 - e) -+ * = a0 * e^2 + a * (1 - e) * (1 + e) -+ * -+ * a3 = a2 * e + a * (1 - e) -+ * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e) -+ * = a0 * e^3 + a * (1 - e) * (1 + e + e^2) -+ * -+ * ... -+ * -+ * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1] -+ * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e) -+ * = a0 * e^n + a * (1 - e^n) -+ * -+ * [1] application of the geometric series: -+ * -+ * n 1 - x^(n+1) -+ * S_n := \Sum x^i = ------------- -+ * i=0 1 - x -+ */ -+unsigned long -+calc_load_n(unsigned long load, unsigned long exp, -+ unsigned long active, unsigned int n) -+{ -+ return calc_load(load, fixed_power_int(exp, FSHIFT, n), active); -+} -+ -+DEFINE_PER_CPU(struct kernel_stat, kstat); -+DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat); -+ -+EXPORT_PER_CPU_SYMBOL(kstat); -+EXPORT_PER_CPU_SYMBOL(kernel_cpustat); -+ -+#ifdef CONFIG_PARAVIRT -+static inline u64 steal_ticks(u64 steal) -+{ -+ if (unlikely(steal > NSEC_PER_SEC)) -+ return div_u64(steal, TICK_NSEC); -+ -+ return __iter_div_u64_rem(steal, TICK_NSEC, &steal); -+} -+#endif -+ -+#ifndef nsecs_to_cputime -+# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) -+#endif -+ -+/* -+ * On each tick, add the number of nanoseconds to the unbanked variables and -+ * once one tick's worth has accumulated, account it allowing for accurate -+ * sub-tick accounting and totals. Use the TICK_APPROX_NS to match the way we -+ * deduct nanoseconds. -+ */ -+static void pc_idle_time(struct rq *rq, struct task_struct *idle, unsigned long ns) -+{ -+ u64 *cpustat = kcpustat_this_cpu->cpustat; -+ unsigned long ticks; -+ -+ if (atomic_read(&rq->nr_iowait) > 0) { -+ rq->iowait_ns += ns; -+ if (rq->iowait_ns >= JIFFY_NS) { -+ ticks = NS_TO_JIFFIES(rq->iowait_ns); -+ cpustat[CPUTIME_IOWAIT] += (__force u64)TICK_APPROX_NS * ticks; -+ rq->iowait_ns %= JIFFY_NS; -+ } -+ } else { -+ rq->idle_ns += ns; -+ if (rq->idle_ns >= JIFFY_NS) { -+ ticks = NS_TO_JIFFIES(rq->idle_ns); -+ cpustat[CPUTIME_IDLE] += (__force u64)TICK_APPROX_NS * ticks; -+ rq->idle_ns %= JIFFY_NS; -+ } -+ } -+ acct_update_integrals(idle); -+} -+ -+static void pc_system_time(struct rq *rq, struct task_struct *p, -+ int hardirq_offset, unsigned long ns) -+{ -+ u64 *cpustat = kcpustat_this_cpu->cpustat; -+ unsigned long ticks; -+ -+ p->stime_ns += ns; -+ if (p->stime_ns >= JIFFY_NS) { -+ ticks = NS_TO_JIFFIES(p->stime_ns); -+ p->stime_ns %= JIFFY_NS; -+ p->stime += (__force u64)TICK_APPROX_NS * ticks; -+ account_group_system_time(p, TICK_APPROX_NS * ticks); -+ } -+ p->sched_time += ns; -+ account_group_exec_runtime(p, ns); -+ -+ if (hardirq_count() - hardirq_offset) { -+ rq->irq_ns += ns; -+ if (rq->irq_ns >= JIFFY_NS) { -+ ticks = NS_TO_JIFFIES(rq->irq_ns); -+ cpustat[CPUTIME_IRQ] += (__force u64)TICK_APPROX_NS * ticks; -+ rq->irq_ns %= JIFFY_NS; -+ } -+ } else if (in_serving_softirq()) { -+ rq->softirq_ns += ns; -+ if (rq->softirq_ns >= JIFFY_NS) { -+ ticks = NS_TO_JIFFIES(rq->softirq_ns); -+ cpustat[CPUTIME_SOFTIRQ] += (__force u64)TICK_APPROX_NS * ticks; -+ rq->softirq_ns %= JIFFY_NS; -+ } -+ } else { -+ rq->system_ns += ns; -+ if (rq->system_ns >= JIFFY_NS) { -+ ticks = NS_TO_JIFFIES(rq->system_ns); -+ cpustat[CPUTIME_SYSTEM] += (__force u64)TICK_APPROX_NS * ticks; -+ rq->system_ns %= JIFFY_NS; -+ } -+ } -+ acct_update_integrals(p); -+} -+ -+static void pc_user_time(struct rq *rq, struct task_struct *p, unsigned long ns) -+{ -+ u64 *cpustat = kcpustat_this_cpu->cpustat; -+ unsigned long ticks; -+ -+ p->utime_ns += ns; -+ if (p->utime_ns >= JIFFY_NS) { -+ ticks = NS_TO_JIFFIES(p->utime_ns); -+ p->utime_ns %= JIFFY_NS; -+ p->utime += (__force u64)TICK_APPROX_NS * ticks; -+ account_group_user_time(p, TICK_APPROX_NS * ticks); -+ } -+ p->sched_time += ns; -+ account_group_exec_runtime(p, ns); -+ -+ if (this_cpu_ksoftirqd() == p) { -+ /* -+ * ksoftirqd time do not get accounted in cpu_softirq_time. -+ * So, we have to handle it separately here. -+ */ -+ rq->softirq_ns += ns; -+ if (rq->softirq_ns >= JIFFY_NS) { -+ ticks = NS_TO_JIFFIES(rq->softirq_ns); -+ cpustat[CPUTIME_SOFTIRQ] += (__force u64)TICK_APPROX_NS * ticks; -+ rq->softirq_ns %= JIFFY_NS; -+ } -+ } -+ -+ if (task_nice(p) > 0 || idleprio_task(p)) { -+ rq->nice_ns += ns; -+ if (rq->nice_ns >= JIFFY_NS) { -+ ticks = NS_TO_JIFFIES(rq->nice_ns); -+ cpustat[CPUTIME_NICE] += (__force u64)TICK_APPROX_NS * ticks; -+ rq->nice_ns %= JIFFY_NS; -+ } -+ } else { -+ rq->user_ns += ns; -+ if (rq->user_ns >= JIFFY_NS) { -+ ticks = NS_TO_JIFFIES(rq->user_ns); -+ cpustat[CPUTIME_USER] += (__force u64)TICK_APPROX_NS * ticks; -+ rq->user_ns %= JIFFY_NS; -+ } -+ } -+ acct_update_integrals(p); -+} -+ -+/* -+ * This is called on clock ticks. -+ * Bank in p->sched_time the ns elapsed since the last tick or switch. -+ * CPU scheduler quota accounting is also performed here in microseconds. -+ */ -+static void update_cpu_clock_tick(struct rq *rq, struct task_struct *p) -+{ -+ s64 account_ns = rq->niffies - p->last_ran; -+ struct task_struct *idle = rq->idle; -+ -+ /* Accurate tick timekeeping */ -+ if (user_mode(get_irq_regs())) -+ pc_user_time(rq, p, account_ns); -+ else if (p != idle || (irq_count() != HARDIRQ_OFFSET)) { -+ pc_system_time(rq, p, HARDIRQ_OFFSET, account_ns); -+ } else -+ pc_idle_time(rq, idle, account_ns); -+ -+ /* time_slice accounting is done in usecs to avoid overflow on 32bit */ -+ if (p->policy != SCHED_FIFO && p != idle) -+ p->time_slice -= NS_TO_US(account_ns); -+ -+ p->last_ran = rq->niffies; -+} -+ -+/* -+ * This is called on context switches. -+ * Bank in p->sched_time the ns elapsed since the last tick or switch. -+ * CPU scheduler quota accounting is also performed here in microseconds. -+ */ -+static void update_cpu_clock_switch(struct rq *rq, struct task_struct *p) -+{ -+ s64 account_ns = rq->niffies - p->last_ran; -+ struct task_struct *idle = rq->idle; -+ -+ /* Accurate subtick timekeeping */ -+ if (p != idle) -+ pc_user_time(rq, p, account_ns); -+ else -+ pc_idle_time(rq, idle, account_ns); -+ -+ /* time_slice accounting is done in usecs to avoid overflow on 32bit */ -+ if (p->policy != SCHED_FIFO && p != idle) -+ p->time_slice -= NS_TO_US(account_ns); -+} -+ -+/* -+ * Return any ns on the sched_clock that have not yet been accounted in -+ * @p in case that task is currently running. -+ * -+ * Called with task_rq_lock(p) held. -+ */ -+static inline u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) -+{ -+ u64 ns = 0; -+ -+ /* -+ * Must be ->curr _and_ ->on_rq. If dequeued, we would -+ * project cycles that may never be accounted to this -+ * thread, breaking clock_gettime(). -+ */ -+ if (p == rq->curr && task_on_rq_queued(p)) { -+ update_clocks(rq); -+ ns = rq->niffies - p->last_ran; -+ } -+ -+ return ns; -+} -+ -+/* -+ * Return accounted runtime for the task. -+ * Return separately the current's pending runtime that have not been -+ * accounted yet. -+ * -+ */ -+unsigned long long task_sched_runtime(struct task_struct *p) -+{ -+ struct rq_flags rf; -+ struct rq *rq; -+ u64 ns; -+ -+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP) -+ /* -+ * 64-bit doesn't need locks to atomically read a 64-bit value. -+ * So we have a optimisation chance when the task's delta_exec is 0. -+ * Reading ->on_cpu is racy, but this is ok. -+ * -+ * If we race with it leaving CPU, we'll take a lock. So we're correct. -+ * If we race with it entering CPU, unaccounted time is 0. This is -+ * indistinguishable from the read occurring a few cycles earlier. -+ * If we see ->on_cpu without ->on_rq, the task is leaving, and has -+ * been accounted, so we're correct here as well. -+ */ -+ if (!p->on_cpu || !task_on_rq_queued(p)) -+ return tsk_seruntime(p); -+#endif -+ -+ rq = task_rq_lock(p, &rf); -+ ns = p->sched_time + do_task_delta_exec(p, rq); -+ task_rq_unlock(rq, p, &rf); -+ -+ return ns; -+} -+ -+/* -+ * Functions to test for when SCHED_ISO tasks have used their allocated -+ * quota as real time scheduling and convert them back to SCHED_NORMAL. All -+ * data is modified only by the local runqueue during scheduler_tick with -+ * interrupts disabled. -+ */ -+ -+/* -+ * Test if SCHED_ISO tasks have run longer than their alloted period as RT -+ * tasks and set the refractory flag if necessary. There is 10% hysteresis -+ * for unsetting the flag. 115/128 is ~90/100 as a fast shift instead of a -+ * slow division. -+ */ -+static inline void iso_tick(struct rq *rq) -+{ -+ rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - 1) / ISO_PERIOD; -+ rq->iso_ticks += 100; -+ if (rq->iso_ticks > ISO_PERIOD * sched_iso_cpu) { -+ rq->iso_refractory = true; -+ if (unlikely(rq->iso_ticks > ISO_PERIOD * 100)) -+ rq->iso_ticks = ISO_PERIOD * 100; -+ } -+} -+ -+/* No SCHED_ISO task was running so decrease rq->iso_ticks */ -+static inline void no_iso_tick(struct rq *rq, int ticks) -+{ -+ if (rq->iso_ticks > 0 || rq->iso_refractory) { -+ rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - ticks) / ISO_PERIOD; -+ if (rq->iso_ticks < ISO_PERIOD * (sched_iso_cpu * 115 / 128)) { -+ rq->iso_refractory = false; -+ if (unlikely(rq->iso_ticks < 0)) -+ rq->iso_ticks = 0; -+ } -+ } -+} -+ -+/* This manages tasks that have run out of timeslice during a scheduler_tick */ -+static void task_running_tick(struct rq *rq) -+{ -+ struct task_struct *p = rq->curr; -+ -+ /* -+ * If a SCHED_ISO task is running we increment the iso_ticks. In -+ * order to prevent SCHED_ISO tasks from causing starvation in the -+ * presence of true RT tasks we account those as iso_ticks as well. -+ */ -+ if (rt_task(p) || task_running_iso(p)) -+ iso_tick(rq); -+ else -+ no_iso_tick(rq, 1); -+ -+ /* SCHED_FIFO tasks never run out of timeslice. */ -+ if (p->policy == SCHED_FIFO) -+ return; -+ -+ if (iso_task(p)) { -+ if (task_running_iso(p)) { -+ if (rq->iso_refractory) { -+ /* -+ * SCHED_ISO task is running as RT and limit -+ * has been hit. Force it to reschedule as -+ * SCHED_NORMAL by zeroing its time_slice -+ */ -+ p->time_slice = 0; -+ } -+ } else if (!rq->iso_refractory) { -+ /* Can now run again ISO. Reschedule to pick up prio */ -+ goto out_resched; -+ } -+ } -+ -+ /* -+ * Tasks that were scheduled in the first half of a tick are not -+ * allowed to run into the 2nd half of the next tick if they will -+ * run out of time slice in the interim. Otherwise, if they have -+ * less than RESCHED_US μs of time slice left they will be rescheduled. -+ * Dither is used as a backup for when hrexpiry is disabled or high res -+ * timers not configured in. -+ */ -+ if (p->time_slice - rq->dither >= RESCHED_US) -+ return; -+out_resched: -+ rq_lock(rq); -+ __set_tsk_resched(p); -+ rq_unlock(rq); -+} -+ -+static inline void task_tick(struct rq *rq) -+{ -+ if (!rq_idle(rq)) -+ task_running_tick(rq); -+ else if (rq->last_jiffy > rq->last_scheduler_tick) -+ no_iso_tick(rq, rq->last_jiffy - rq->last_scheduler_tick); -+} -+ -+#ifdef CONFIG_NO_HZ_FULL -+/* -+ * We can stop the timer tick any time highres timers are active since -+ * we rely entirely on highres timeouts for task expiry rescheduling. -+ */ -+static void sched_stop_tick(struct rq *rq, int cpu) -+{ -+ if (!hrexpiry_enabled(rq)) -+ return; -+ if (!tick_nohz_full_enabled()) -+ return; -+ if (!tick_nohz_full_cpu(cpu)) -+ return; -+ tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED); -+} -+ -+static inline void sched_start_tick(struct rq *rq, int cpu) -+{ -+ tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED); -+} -+ -+struct tick_work { -+ int cpu; -+ atomic_t state; -+ struct delayed_work work; -+}; -+/* Values for ->state, see diagram below. */ -+#define TICK_SCHED_REMOTE_OFFLINE 0 -+#define TICK_SCHED_REMOTE_OFFLINING 1 -+#define TICK_SCHED_REMOTE_RUNNING 2 -+ -+/* -+ * State diagram for ->state: -+ * -+ * -+ * TICK_SCHED_REMOTE_OFFLINE -+ * | ^ -+ * | | -+ * | | sched_tick_remote() -+ * | | -+ * | | -+ * +--TICK_SCHED_REMOTE_OFFLINING -+ * | ^ -+ * | | -+ * sched_tick_start() | | sched_tick_stop() -+ * | | -+ * V | -+ * TICK_SCHED_REMOTE_RUNNING -+ * -+ * -+ * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote() -+ * and sched_tick_start() are happy to leave the state in RUNNING. -+ */ -+ -+static struct tick_work __percpu *tick_work_cpu; -+ -+static void sched_tick_remote(struct work_struct *work) -+{ -+ struct delayed_work *dwork = to_delayed_work(work); -+ struct tick_work *twork = container_of(dwork, struct tick_work, work); -+ int cpu = twork->cpu; -+ struct rq *rq = cpu_rq(cpu); -+ struct task_struct *curr; -+ u64 delta; -+ int os; -+ -+ /* -+ * Handle the tick only if it appears the remote CPU is running in full -+ * dynticks mode. The check is racy by nature, but missing a tick or -+ * having one too much is no big deal because the scheduler tick updates -+ * statistics and checks timeslices in a time-independent way, regardless -+ * of when exactly it is running. -+ */ -+ if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu)) -+ goto out_requeue; -+ -+ rq_lock_irq(rq); -+ curr = rq->curr; -+ if (is_idle_task(curr) || cpu_is_offline(cpu)) -+ goto out_unlock; -+ -+ update_rq_clock(rq); -+ delta = rq_clock_task(rq) - curr->last_ran; -+ -+ /* -+ * Make sure the next tick runs within a reasonable -+ * amount of time. -+ */ -+ WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); -+ task_tick(rq); -+ -+out_unlock: -+ rq_unlock_irq(rq, NULL); -+ -+out_requeue: -+ /* -+ * Run the remote tick once per second (1Hz). This arbitrary -+ * frequency is large enough to avoid overload but short enough -+ * to keep scheduler internal stats reasonably up to date. But -+ * first update state to reflect hotplug activity if required. -+ */ -+ os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING); -+ WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE); -+ if (os == TICK_SCHED_REMOTE_RUNNING) -+ queue_delayed_work(system_unbound_wq, dwork, HZ); -+} -+ -+static void sched_tick_start(int cpu) -+{ -+ struct tick_work *twork; -+ int os; -+ -+ if (housekeeping_cpu(cpu, HK_FLAG_TICK)) -+ return; -+ -+ WARN_ON_ONCE(!tick_work_cpu); -+ -+ twork = per_cpu_ptr(tick_work_cpu, cpu); -+ os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING); -+ WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING); -+ if (os == TICK_SCHED_REMOTE_OFFLINE) { -+ twork->cpu = cpu; -+ INIT_DELAYED_WORK(&twork->work, sched_tick_remote); -+ queue_delayed_work(system_unbound_wq, &twork->work, HZ); -+ } -+} -+ -+#ifdef CONFIG_HOTPLUG_CPU -+static void sched_tick_stop(int cpu) -+{ -+ struct tick_work *twork; -+ int os; -+ -+ if (housekeeping_cpu(cpu, HK_FLAG_TICK)) -+ return; -+ -+ WARN_ON_ONCE(!tick_work_cpu); -+ -+ twork = per_cpu_ptr(tick_work_cpu, cpu); -+ /* There cannot be competing actions, but don't rely on stop-machine. */ -+ os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_OFFLINING); -+ WARN_ON_ONCE(os != TICK_SCHED_REMOTE_RUNNING); -+ /* Don't cancel, as this would mess up the state machine. */ -+} -+#endif /* CONFIG_HOTPLUG_CPU */ -+ -+int __init sched_tick_offload_init(void) -+{ -+ tick_work_cpu = alloc_percpu(struct tick_work); -+ BUG_ON(!tick_work_cpu); -+ return 0; -+} -+ -+#else /* !CONFIG_NO_HZ_FULL */ -+static inline void sched_stop_tick(struct rq *rq, int cpu) {} -+static inline void sched_start_tick(struct rq *rq, int cpu) {} -+static inline void sched_tick_start(int cpu) { } -+static inline void sched_tick_stop(int cpu) { } -+#endif -+ -+/* -+ * This function gets called by the timer code, with HZ frequency. -+ * We call it with interrupts disabled. -+ */ -+void scheduler_tick(void) -+{ -+ int cpu __maybe_unused = smp_processor_id(); -+ struct rq *rq = cpu_rq(cpu); -+ -+ sched_clock_tick(); -+ update_clocks(rq); -+ update_load_avg(rq, 0); -+ update_cpu_clock_tick(rq, rq->curr); -+ task_tick(rq); -+ rq->last_scheduler_tick = rq->last_jiffy; -+ rq->last_tick = rq->clock; -+ psi_task_tick(rq); -+ perf_event_task_tick(); -+ sched_stop_tick(rq, cpu); -+} -+ -+#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \ -+ defined(CONFIG_TRACE_PREEMPT_TOGGLE)) -+/* -+ * If the value passed in is equal to the current preempt count -+ * then we just disabled preemption. Start timing the latency. -+ */ -+static inline void preempt_latency_start(int val) -+{ -+ if (preempt_count() == val) { -+ unsigned long ip = get_lock_parent_ip(); -+#ifdef CONFIG_DEBUG_PREEMPT -+ current->preempt_disable_ip = ip; -+#endif -+ trace_preempt_off(CALLER_ADDR0, ip); -+ } -+} -+ -+void preempt_count_add(int val) -+{ -+#ifdef CONFIG_DEBUG_PREEMPT -+ /* -+ * Underflow? -+ */ -+ if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) -+ return; -+#endif -+ __preempt_count_add(val); -+#ifdef CONFIG_DEBUG_PREEMPT -+ /* -+ * Spinlock count overflowing soon? -+ */ -+ DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= -+ PREEMPT_MASK - 10); -+#endif -+ preempt_latency_start(val); -+} -+EXPORT_SYMBOL(preempt_count_add); -+NOKPROBE_SYMBOL(preempt_count_add); -+ -+/* -+ * If the value passed in equals to the current preempt count -+ * then we just enabled preemption. Stop timing the latency. -+ */ -+static inline void preempt_latency_stop(int val) -+{ -+ if (preempt_count() == val) -+ trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip()); -+} -+ -+void preempt_count_sub(int val) -+{ -+#ifdef CONFIG_DEBUG_PREEMPT -+ /* -+ * Underflow? -+ */ -+ if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) -+ return; -+ /* -+ * Is the spinlock portion underflowing? -+ */ -+ if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) && -+ !(preempt_count() & PREEMPT_MASK))) -+ return; -+#endif -+ -+ preempt_latency_stop(val); -+ __preempt_count_sub(val); -+} -+EXPORT_SYMBOL(preempt_count_sub); -+NOKPROBE_SYMBOL(preempt_count_sub); -+ -+#else -+static inline void preempt_latency_start(int val) { } -+static inline void preempt_latency_stop(int val) { } -+#endif -+ -+static inline unsigned long get_preempt_disable_ip(struct task_struct *p) -+{ -+#ifdef CONFIG_DEBUG_PREEMPT -+ return p->preempt_disable_ip; -+#else -+ return 0; -+#endif -+} -+ -+/* -+ * The time_slice is only refilled when it is empty and that is when we set a -+ * new deadline. Make sure update_clocks has been called recently to update -+ * rq->niffies. -+ */ -+static void time_slice_expired(struct task_struct *p, struct rq *rq) -+{ -+ p->time_slice = timeslice(); -+ p->deadline = rq->niffies + task_deadline_diff(p); -+#ifdef CONFIG_SMT_NICE -+ if (!p->mm) -+ p->smt_bias = 0; -+ else if (rt_task(p)) -+ p->smt_bias = 1 << 30; -+ else if (task_running_iso(p)) -+ p->smt_bias = 1 << 29; -+ else if (idleprio_task(p)) { -+ if (task_running_idle(p)) -+ p->smt_bias = 0; -+ else -+ p->smt_bias = 1; -+ } else if (--p->smt_bias < 1) -+ p->smt_bias = MAX_PRIO - p->static_prio; -+#endif -+} -+ -+/* -+ * Timeslices below RESCHED_US are considered as good as expired as there's no -+ * point rescheduling when there's so little time left. SCHED_BATCH tasks -+ * have been flagged be not latency sensitive and likely to be fully CPU -+ * bound so every time they're rescheduled they have their time_slice -+ * refilled, but get a new later deadline to have little effect on -+ * SCHED_NORMAL tasks. -+ -+ */ -+static inline void check_deadline(struct task_struct *p, struct rq *rq) -+{ -+ if (p->time_slice < RESCHED_US || batch_task(p)) -+ time_slice_expired(p, rq); -+} -+ -+/* -+ * Task selection with skiplists is a simple matter of picking off the first -+ * task in the sorted list, an O(1) operation. The lookup is amortised O(1) -+ * being bound to the number of processors. -+ * -+ * Runqueues are selectively locked based on their unlocked data and then -+ * unlocked if not needed. At most 3 locks will be held at any time and are -+ * released as soon as they're no longer needed. All balancing between CPUs -+ * is thus done here in an extremely simple first come best fit manner. -+ * -+ * This iterates over runqueues in cache locality order. In interactive mode -+ * it iterates over all CPUs and finds the task with the best key/deadline. -+ * In non-interactive mode it will only take a task if it's from the current -+ * runqueue or a runqueue with more tasks than the current one with a better -+ * key/deadline. -+ */ -+#ifdef CONFIG_SMP -+static inline struct task_struct -+*earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle) -+{ -+ struct rq *locked = NULL, *chosen = NULL; -+ struct task_struct *edt = idle; -+ int i, best_entries = 0; -+ u64 best_key = ~0ULL; -+ -+ for (i = 0; i < total_runqueues; i++) { -+ struct rq *other_rq = rq_order(rq, i); -+ skiplist_node *next; -+ int entries; -+ -+ entries = other_rq->sl->entries; -+ /* -+ * Check for queued entres lockless first. The local runqueue -+ * is locked so entries will always be accurate. -+ */ -+ if (!sched_interactive) { -+ /* -+ * Don't reschedule balance across nodes unless the CPU -+ * is idle. -+ */ -+ if (edt != idle && rq->cpu_locality[other_rq->cpu] > LOCALITY_SMP) -+ break; -+ if (entries <= best_entries) -+ continue; -+ } else if (!entries) -+ continue; -+ -+ /* if (i) implies other_rq != rq */ -+ if (i) { -+ /* Check for best id queued lockless first */ -+ if (other_rq->best_key >= best_key) -+ continue; -+ -+ if (unlikely(!trylock_rq(rq, other_rq))) -+ continue; -+ -+ /* Need to reevaluate entries after locking */ -+ entries = other_rq->sl->entries; -+ if (unlikely(!entries)) { -+ unlock_rq(other_rq); -+ continue; -+ } -+ } -+ -+ next = other_rq->node; -+ /* -+ * In interactive mode we check beyond the best entry on other -+ * runqueues if we can't get the best for smt or affinity -+ * reasons. -+ */ -+ while ((next = next->next[0]) != other_rq->node) { -+ struct task_struct *p; -+ u64 key = next->key; -+ -+ /* Reevaluate key after locking */ -+ if (key >= best_key) -+ break; -+ -+ p = next->value; -+ if (!smt_schedule(p, rq)) { -+ if (i && !sched_interactive) -+ break; -+ continue; -+ } -+ -+ if (sched_other_cpu(p, cpu)) { -+ if (sched_interactive || !i) -+ continue; -+ break; -+ } -+ /* Make sure affinity is ok */ -+ if (i) { -+ /* From this point on p is the best so far */ -+ if (locked) -+ unlock_rq(locked); -+ chosen = locked = other_rq; -+ } -+ best_entries = entries; -+ best_key = key; -+ edt = p; -+ break; -+ } -+ /* rq->preempting is a hint only as the state may have changed -+ * since it was set with the resched call but if we have met -+ * the condition we can break out here. */ -+ if (edt == rq->preempting) -+ break; -+ if (i && other_rq != chosen) -+ unlock_rq(other_rq); -+ } -+ -+ if (likely(edt != idle)) -+ take_task(rq, cpu, edt); -+ -+ if (locked) -+ unlock_rq(locked); -+ -+ rq->preempting = NULL; -+ -+ return edt; -+} -+#else /* CONFIG_SMP */ -+static inline struct task_struct -+*earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle) -+{ -+ struct task_struct *edt; -+ -+ if (unlikely(!rq->sl->entries)) -+ return idle; -+ edt = rq->node->next[0]->value; -+ take_task(rq, cpu, edt); -+ return edt; -+} -+#endif /* CONFIG_SMP */ -+ -+/* -+ * Print scheduling while atomic bug: -+ */ -+static noinline void __schedule_bug(struct task_struct *prev) -+{ -+ /* Save this before calling printk(), since that will clobber it */ -+ unsigned long preempt_disable_ip = get_preempt_disable_ip(current); -+ -+ if (oops_in_progress) -+ return; -+ -+ printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n", -+ prev->comm, prev->pid, preempt_count()); -+ -+ debug_show_held_locks(prev); -+ print_modules(); -+ if (irqs_disabled()) -+ print_irqtrace_events(prev); -+ if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) -+ && in_atomic_preempt_off()) { -+ pr_err("Preemption disabled at:"); -+ print_ip_sym(preempt_disable_ip); -+ pr_cont("\n"); -+ } -+ dump_stack(); -+ add_taint(TAINT_WARN, LOCKDEP_STILL_OK); -+} -+ -+/* -+ * Various schedule()-time debugging checks and statistics: -+ */ -+static inline void schedule_debug(struct task_struct *prev, bool preempt) -+{ -+#ifdef CONFIG_SCHED_STACK_END_CHECK -+ if (task_stack_end_corrupted(prev)) -+ panic("corrupted stack end detected inside scheduler\n"); -+#endif -+ -+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP -+ if (!preempt && prev->state && prev->non_block_count) { -+ printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n", -+ prev->comm, prev->pid, prev->non_block_count); -+ dump_stack(); -+ add_taint(TAINT_WARN, LOCKDEP_STILL_OK); -+ } -+#endif -+ -+ if (unlikely(in_atomic_preempt_off())) { -+ __schedule_bug(prev); -+ preempt_count_set(PREEMPT_DISABLED); -+ } -+ rcu_sleep_check(); -+ -+ profile_hit(SCHED_PROFILING, __builtin_return_address(0)); -+ -+ schedstat_inc(this_rq()->sched_count); -+} -+ -+/* -+ * The currently running task's information is all stored in rq local data -+ * which is only modified by the local CPU. -+ */ -+static inline void set_rq_task(struct rq *rq, struct task_struct *p) -+{ -+ if (p == rq->idle || p->policy == SCHED_FIFO) -+ hrexpiry_clear(rq); -+ else -+ hrexpiry_start(rq, US_TO_NS(p->time_slice)); -+ if (rq->clock - rq->last_tick > HALF_JIFFY_NS) -+ rq->dither = 0; -+ else -+ rq->dither = rq_dither(rq); -+ -+ rq->rq_deadline = p->deadline; -+ rq->rq_prio = p->prio; -+#ifdef CONFIG_SMT_NICE -+ rq->rq_mm = p->mm; -+ rq->rq_smt_bias = p->smt_bias; -+#endif -+} -+ -+#ifdef CONFIG_SMT_NICE -+static void check_no_siblings(struct rq __maybe_unused *this_rq) {} -+static void wake_no_siblings(struct rq __maybe_unused *this_rq) {} -+static void (*check_siblings)(struct rq *this_rq) = &check_no_siblings; -+static void (*wake_siblings)(struct rq *this_rq) = &wake_no_siblings; -+ -+/* Iterate over smt siblings when we've scheduled a process on cpu and decide -+ * whether they should continue running or be descheduled. */ -+static void check_smt_siblings(struct rq *this_rq) -+{ -+ int other_cpu; -+ -+ for_each_cpu(other_cpu, &this_rq->thread_mask) { -+ struct task_struct *p; -+ struct rq *rq; -+ -+ rq = cpu_rq(other_cpu); -+ if (rq_idle(rq)) -+ continue; -+ p = rq->curr; -+ if (!smt_schedule(p, this_rq)) -+ resched_curr(rq); -+ } -+} -+ -+static void wake_smt_siblings(struct rq *this_rq) -+{ -+ int other_cpu; -+ -+ for_each_cpu(other_cpu, &this_rq->thread_mask) { -+ struct rq *rq; -+ -+ rq = cpu_rq(other_cpu); -+ if (rq_idle(rq)) -+ resched_idle(rq); -+ } -+} -+#else -+static void check_siblings(struct rq __maybe_unused *this_rq) {} -+static void wake_siblings(struct rq __maybe_unused *this_rq) {} -+#endif -+ -+/* -+ * schedule() is the main scheduler function. -+ * -+ * The main means of driving the scheduler and thus entering this function are: -+ * -+ * 1. Explicit blocking: mutex, semaphore, waitqueue, etc. -+ * -+ * 2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return -+ * paths. For example, see arch/x86/entry_64.S. -+ * -+ * To drive preemption between tasks, the scheduler sets the flag in timer -+ * interrupt handler scheduler_tick(). -+ * -+ * 3. Wakeups don't really cause entry into schedule(). They add a -+ * task to the run-queue and that's it. -+ * -+ * Now, if the new task added to the run-queue preempts the current -+ * task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets -+ * called on the nearest possible occasion: -+ * -+ * - If the kernel is preemptible (CONFIG_PREEMPTION=y): -+ * -+ * - in syscall or exception context, at the next outmost -+ * preempt_enable(). (this might be as soon as the wake_up()'s -+ * spin_unlock()!) -+ * -+ * - in IRQ context, return from interrupt-handler to -+ * preemptible context -+ * -+ * - If the kernel is not preemptible (CONFIG_PREEMPTION is not set) -+ * then at the next: -+ * -+ * - cond_resched() call -+ * - explicit schedule() call -+ * - return from syscall or exception to user-space -+ * - return from interrupt-handler to user-space -+ * -+ * WARNING: must be called with preemption disabled! -+ */ -+static void __sched notrace __schedule(bool preempt) -+{ -+ struct task_struct *prev, *next, *idle; -+ unsigned long *switch_count; -+ bool deactivate = false; -+ struct rq *rq; -+ u64 niffies; -+ int cpu; -+ -+ cpu = smp_processor_id(); -+ rq = cpu_rq(cpu); -+ prev = rq->curr; -+ idle = rq->idle; -+ -+ schedule_debug(prev, preempt); -+ -+ local_irq_disable(); -+ rcu_note_context_switch(preempt); -+ -+ /* -+ * Make sure that signal_pending_state()->signal_pending() below -+ * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) -+ * done by the caller to avoid the race with signal_wake_up(). -+ * -+ * The membarrier system call requires a full memory barrier -+ * after coming from user-space, before storing to rq->curr. -+ */ -+ rq_lock(rq); -+ smp_mb__after_spinlock(); -+#ifdef CONFIG_SMP -+ if (rq->preempt) { -+ /* -+ * Make sure resched_curr hasn't triggered a preemption -+ * locklessly on a task that has since scheduled away. Spurious -+ * wakeup of idle is okay though. -+ */ -+ if (unlikely(preempt && prev != idle && !test_tsk_need_resched(prev))) { -+ rq->preempt = NULL; -+ clear_preempt_need_resched(); -+ rq_unlock_irq(rq, NULL); -+ return; -+ } -+ rq->preempt = NULL; -+ } -+#endif -+ -+ switch_count = &prev->nivcsw; -+ if (!preempt && prev->state) { -+ if (signal_pending_state(prev->state, prev)) { -+ prev->state = TASK_RUNNING; -+ } else { -+ deactivate = true; -+ -+ if (prev->in_iowait) { -+ atomic_inc(&rq->nr_iowait); -+ delayacct_blkio_start(); -+ } -+ } -+ switch_count = &prev->nvcsw; -+ } -+ -+ /* -+ * Store the niffy value here for use by the next task's last_ran -+ * below to avoid losing niffies due to update_clocks being called -+ * again after this point. -+ */ -+ update_clocks(rq); -+ niffies = rq->niffies; -+ update_cpu_clock_switch(rq, prev); -+ -+ clear_tsk_need_resched(prev); -+ clear_preempt_need_resched(); -+ -+ if (idle != prev) { -+ check_deadline(prev, rq); -+ return_task(prev, rq, cpu, deactivate); -+ } -+ -+ next = earliest_deadline_task(rq, cpu, idle); -+ if (likely(next->prio != PRIO_LIMIT)) -+ clear_cpuidle_map(cpu); -+ else { -+ set_cpuidle_map(cpu); -+ update_load_avg(rq, 0); -+ } -+ -+ set_rq_task(rq, next); -+ next->last_ran = niffies; -+ -+ if (likely(prev != next)) { -+ /* -+ * Don't reschedule an idle task or deactivated tasks -+ */ -+ if (prev == idle) { -+ rq->nr_running++; -+ if (rt_task(next)) -+ rq->rt_nr_running++; -+ } else if (!deactivate) -+ resched_suitable_idle(prev); -+ if (unlikely(next == idle)) { -+ rq->nr_running--; -+ if (rt_task(prev)) -+ rq->rt_nr_running--; -+ wake_siblings(rq); -+ } else -+ check_siblings(rq); -+ rq->nr_switches++; -+ /* -+ * RCU users of rcu_dereference(rq->curr) may not see -+ * changes to task_struct made by pick_next_task(). -+ */ -+ RCU_INIT_POINTER(rq->curr, next); -+ /* -+ * The membarrier system call requires each architecture -+ * to have a full memory barrier after updating -+ * rq->curr, before returning to user-space. -+ * -+ * Here are the schemes providing that barrier on the -+ * various architectures: -+ * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC. -+ * switch_mm() rely on membarrier_arch_switch_mm() on PowerPC. -+ * - finish_lock_switch() for weakly-ordered -+ * architectures where spin_unlock is a full barrier, -+ * - switch_to() for arm64 (weakly-ordered, spin_unlock -+ * is a RELEASE barrier), -+ */ -+ ++*switch_count; -+ -+ trace_sched_switch(preempt, prev, next); -+ context_switch(rq, prev, next); /* unlocks the rq */ -+ } else { -+ check_siblings(rq); -+ rq_unlock(rq); -+ do_pending_softirq(rq, next); -+ local_irq_enable(); -+ } -+} -+ -+void __noreturn do_task_dead(void) -+{ -+ /* Causes final put_task_struct in finish_task_switch(). */ -+ set_special_state(TASK_DEAD); -+ -+ /* Tell freezer to ignore us: */ -+ current->flags |= PF_NOFREEZE; -+ __schedule(false); -+ BUG(); -+ -+ /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ -+ for (;;) -+ cpu_relax(); -+} -+ -+static inline void sched_submit_work(struct task_struct *tsk) -+{ -+ if (!tsk->state) -+ return; -+ -+ /* -+ * If a worker went to sleep, notify and ask workqueue whether -+ * it wants to wake up a task to maintain concurrency. -+ * As this function is called inside the schedule() context, -+ * we disable preemption to avoid it calling schedule() again -+ * in the possible wakeup of a kworker. -+ */ -+ if (tsk->flags & PF_WQ_WORKER) { -+ preempt_disable(); -+ wq_worker_sleeping(tsk); -+ preempt_enable_no_resched(); -+ } -+ -+ if (tsk_is_pi_blocked(tsk)) -+ return; -+ -+ /* -+ * If we are going to sleep and we have plugged IO queued, -+ * make sure to submit it to avoid deadlocks. -+ */ -+ if (blk_needs_flush_plug(tsk)) -+ blk_schedule_flush_plug(tsk); -+} -+ -+static inline void sched_update_worker(struct task_struct *tsk) -+{ -+ if (tsk->flags & PF_WQ_WORKER) -+ wq_worker_running(tsk); -+} -+ -+asmlinkage __visible void __sched schedule(void) -+{ -+ struct task_struct *tsk = current; -+ -+ sched_submit_work(tsk); -+ do { -+ preempt_disable(); -+ __schedule(false); -+ sched_preempt_enable_no_resched(); -+ } while (need_resched()); -+ sched_update_worker(tsk); -+} -+ -+EXPORT_SYMBOL(schedule); -+ -+/* -+ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted -+ * state (have scheduled out non-voluntarily) by making sure that all -+ * tasks have either left the run queue or have gone into user space. -+ * As idle tasks do not do either, they must not ever be preempted -+ * (schedule out non-voluntarily). -+ * -+ * schedule_idle() is similar to schedule_preempt_disable() except that it -+ * never enables preemption because it does not call sched_submit_work(). -+ */ -+void __sched schedule_idle(void) -+{ -+ /* -+ * As this skips calling sched_submit_work(), which the idle task does -+ * regardless because that function is a nop when the task is in a -+ * TASK_RUNNING state, make sure this isn't used someplace that the -+ * current task can be in any other state. Note, idle is always in the -+ * TASK_RUNNING state. -+ */ -+ WARN_ON_ONCE(current->state); -+ do { -+ __schedule(false); -+ } while (need_resched()); -+} -+ -+#ifdef CONFIG_CONTEXT_TRACKING -+asmlinkage __visible void __sched schedule_user(void) -+{ -+ /* -+ * If we come here after a random call to set_need_resched(), -+ * or we have been woken up remotely but the IPI has not yet arrived, -+ * we haven't yet exited the RCU idle mode. Do it here manually until -+ * we find a better solution. -+ * -+ * NB: There are buggy callers of this function. Ideally we -+ * should warn if prev_state != IN_USER, but that will trigger -+ * too frequently to make sense yet. -+ */ -+ enum ctx_state prev_state = exception_enter(); -+ schedule(); -+ exception_exit(prev_state); -+} -+#endif -+ -+/** -+ * schedule_preempt_disabled - called with preemption disabled -+ * -+ * Returns with preemption disabled. Note: preempt_count must be 1 -+ */ -+void __sched schedule_preempt_disabled(void) -+{ -+ sched_preempt_enable_no_resched(); -+ schedule(); -+ preempt_disable(); -+} -+ -+static void __sched notrace preempt_schedule_common(void) -+{ -+ do { -+ /* -+ * Because the function tracer can trace preempt_count_sub() -+ * and it also uses preempt_enable/disable_notrace(), if -+ * NEED_RESCHED is set, the preempt_enable_notrace() called -+ * by the function tracer will call this function again and -+ * cause infinite recursion. -+ * -+ * Preemption must be disabled here before the function -+ * tracer can trace. Break up preempt_disable() into two -+ * calls. One to disable preemption without fear of being -+ * traced. The other to still record the preemption latency, -+ * which can also be traced by the function tracer. -+ */ -+ preempt_disable_notrace(); -+ preempt_latency_start(1); -+ __schedule(true); -+ preempt_latency_stop(1); -+ preempt_enable_no_resched_notrace(); -+ -+ /* -+ * Check again in case we missed a preemption opportunity -+ * between schedule and now. -+ */ -+ } while (need_resched()); -+} -+ -+#ifdef CONFIG_PREEMPTION -+/* -+ * This is the entry point to schedule() from in-kernel preemption -+ * off of preempt_enable. -+ */ -+asmlinkage __visible void __sched notrace preempt_schedule(void) -+{ -+ /* -+ * If there is a non-zero preempt_count or interrupts are disabled, -+ * we do not want to preempt the current task. Just return.. -+ */ -+ if (likely(!preemptible())) -+ return; -+ -+ preempt_schedule_common(); -+} -+NOKPROBE_SYMBOL(preempt_schedule); -+EXPORT_SYMBOL(preempt_schedule); -+ -+/** -+ * preempt_schedule_notrace - preempt_schedule called by tracing -+ * -+ * The tracing infrastructure uses preempt_enable_notrace to prevent -+ * recursion and tracing preempt enabling caused by the tracing -+ * infrastructure itself. But as tracing can happen in areas coming -+ * from userspace or just about to enter userspace, a preempt enable -+ * can occur before user_exit() is called. This will cause the scheduler -+ * to be called when the system is still in usermode. -+ * -+ * To prevent this, the preempt_enable_notrace will use this function -+ * instead of preempt_schedule() to exit user context if needed before -+ * calling the scheduler. -+ */ -+asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) -+{ -+ enum ctx_state prev_ctx; -+ -+ if (likely(!preemptible())) -+ return; -+ -+ do { -+ /* -+ * Because the function tracer can trace preempt_count_sub() -+ * and it also uses preempt_enable/disable_notrace(), if -+ * NEED_RESCHED is set, the preempt_enable_notrace() called -+ * by the function tracer will call this function again and -+ * cause infinite recursion. -+ * -+ * Preemption must be disabled here before the function -+ * tracer can trace. Break up preempt_disable() into two -+ * calls. One to disable preemption without fear of being -+ * traced. The other to still record the preemption latency, -+ * which can also be traced by the function tracer. -+ */ -+ preempt_disable_notrace(); -+ preempt_latency_start(1); -+ /* -+ * Needs preempt disabled in case user_exit() is traced -+ * and the tracer calls preempt_enable_notrace() causing -+ * an infinite recursion. -+ */ -+ prev_ctx = exception_enter(); -+ __schedule(true); -+ exception_exit(prev_ctx); -+ -+ preempt_latency_stop(1); -+ preempt_enable_no_resched_notrace(); -+ } while (need_resched()); -+} -+EXPORT_SYMBOL_GPL(preempt_schedule_notrace); -+ -+#endif /* CONFIG_PREEMPTION */ -+ -+/* -+ * This is the entry point to schedule() from kernel preemption -+ * off of irq context. -+ * Note, that this is called and return with irqs disabled. This will -+ * protect us against recursive calling from irq. -+ */ -+asmlinkage __visible void __sched preempt_schedule_irq(void) -+{ -+ enum ctx_state prev_state; -+ -+ /* Catch callers which need to be fixed */ -+ BUG_ON(preempt_count() || !irqs_disabled()); -+ -+ prev_state = exception_enter(); -+ -+ do { -+ preempt_disable(); -+ local_irq_enable(); -+ __schedule(true); -+ local_irq_disable(); -+ sched_preempt_enable_no_resched(); -+ } while (need_resched()); -+ -+ exception_exit(prev_state); -+} -+ -+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags, -+ void *key) -+{ -+ return try_to_wake_up(curr->private, mode, wake_flags); -+} -+EXPORT_SYMBOL(default_wake_function); -+ -+#ifdef CONFIG_RT_MUTEXES -+ -+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) -+{ -+ if (pi_task) -+ prio = min(prio, pi_task->prio); -+ -+ return prio; -+} -+ -+static inline int rt_effective_prio(struct task_struct *p, int prio) -+{ -+ struct task_struct *pi_task = rt_mutex_get_top_task(p); -+ -+ return __rt_effective_prio(pi_task, prio); -+} -+ -+/* -+ * rt_mutex_setprio - set the current priority of a task -+ * @p: task to boost -+ * @pi_task: donor task -+ * -+ * This function changes the 'effective' priority of a task. It does -+ * not touch ->normal_prio like __setscheduler(). -+ * -+ * Used by the rt_mutex code to implement priority inheritance -+ * logic. Call site only calls if the priority of the task changed. -+ */ -+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) -+{ -+ int prio, oldprio; -+ struct rq *rq; -+ -+ /* XXX used to be waiter->prio, not waiter->task->prio */ -+ prio = __rt_effective_prio(pi_task, p->normal_prio); -+ -+ /* -+ * If nothing changed; bail early. -+ */ -+ if (p->pi_top_task == pi_task && prio == p->prio) -+ return; -+ -+ rq = __task_rq_lock(p, NULL); -+ update_rq_clock(rq); -+ /* -+ * Set under pi_lock && rq->lock, such that the value can be used under -+ * either lock. -+ * -+ * Note that there is loads of tricky to make this pointer cache work -+ * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to -+ * ensure a task is de-boosted (pi_task is set to NULL) before the -+ * task is allowed to run again (and can exit). This ensures the pointer -+ * points to a blocked task -- which guaratees the task is present. -+ */ -+ p->pi_top_task = pi_task; -+ -+ /* -+ * For FIFO/RR we only need to set prio, if that matches we're done. -+ */ -+ if (prio == p->prio) -+ goto out_unlock; -+ -+ /* -+ * Idle task boosting is a nono in general. There is one -+ * exception, when PREEMPT_RT and NOHZ is active: -+ * -+ * The idle task calls get_next_timer_interrupt() and holds -+ * the timer wheel base->lock on the CPU and another CPU wants -+ * to access the timer (probably to cancel it). We can safely -+ * ignore the boosting request, as the idle CPU runs this code -+ * with interrupts disabled and will complete the lock -+ * protected section without being interrupted. So there is no -+ * real need to boost. -+ */ -+ if (unlikely(p == rq->idle)) { -+ WARN_ON(p != rq->curr); -+ WARN_ON(p->pi_blocked_on); -+ goto out_unlock; -+ } -+ -+ trace_sched_pi_setprio(p, pi_task); -+ oldprio = p->prio; -+ p->prio = prio; -+ if (task_running(rq, p)){ -+ if (prio > oldprio) -+ resched_task(p); -+ } else if (task_queued(p)) { -+ dequeue_task(rq, p, DEQUEUE_SAVE); -+ enqueue_task(rq, p, ENQUEUE_RESTORE); -+ if (prio < oldprio) -+ try_preempt(p, rq); -+ } -+out_unlock: -+ __task_rq_unlock(rq, NULL); -+} -+#else -+static inline int rt_effective_prio(struct task_struct *p, int prio) -+{ -+ return prio; -+} -+#endif -+ -+/* -+ * Adjust the deadline for when the priority is to change, before it's -+ * changed. -+ */ -+static inline void adjust_deadline(struct task_struct *p, int new_prio) -+{ -+ p->deadline += static_deadline_diff(new_prio) - task_deadline_diff(p); -+} -+ -+void set_user_nice(struct task_struct *p, long nice) -+{ -+ int new_static, old_static; -+ struct rq_flags rf; -+ struct rq *rq; -+ -+ if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) -+ return; -+ new_static = NICE_TO_PRIO(nice); -+ /* -+ * We have to be careful, if called from sys_setpriority(), -+ * the task might be in the middle of scheduling on another CPU. -+ */ -+ rq = task_rq_lock(p, &rf); -+ update_rq_clock(rq); -+ -+ /* -+ * The RT priorities are set via sched_setscheduler(), but we still -+ * allow the 'normal' nice value to be set - but as expected -+ * it wont have any effect on scheduling until the task is -+ * not SCHED_NORMAL/SCHED_BATCH: -+ */ -+ if (has_rt_policy(p)) { -+ p->static_prio = new_static; -+ goto out_unlock; -+ } -+ -+ adjust_deadline(p, new_static); -+ old_static = p->static_prio; -+ p->static_prio = new_static; -+ p->prio = effective_prio(p); -+ -+ if (task_queued(p)) { -+ dequeue_task(rq, p, DEQUEUE_SAVE); -+ enqueue_task(rq, p, ENQUEUE_RESTORE); -+ if (new_static < old_static) -+ try_preempt(p, rq); -+ } else if (task_running(rq, p)) { -+ set_rq_task(rq, p); -+ if (old_static < new_static) -+ resched_task(p); -+ } -+out_unlock: -+ task_rq_unlock(rq, p, &rf); -+} -+EXPORT_SYMBOL(set_user_nice); -+ -+/* -+ * can_nice - check if a task can reduce its nice value -+ * @p: task -+ * @nice: nice value -+ */ -+int can_nice(const struct task_struct *p, const int nice) -+{ -+ /* Convert nice value [19,-20] to rlimit style value [1,40] */ -+ int nice_rlim = nice_to_rlimit(nice); -+ -+ return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) || -+ capable(CAP_SYS_NICE)); -+} -+ -+#ifdef __ARCH_WANT_SYS_NICE -+ -+/* -+ * sys_nice - change the priority of the current process. -+ * @increment: priority increment -+ * -+ * sys_setpriority is a more generic, but much slower function that -+ * does similar things. -+ */ -+SYSCALL_DEFINE1(nice, int, increment) -+{ -+ long nice, retval; -+ -+ /* -+ * Setpriority might change our priority at the same moment. -+ * We don't have to worry. Conceptually one call occurs first -+ * and we have a single winner. -+ */ -+ -+ increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH); -+ nice = task_nice(current) + increment; -+ -+ nice = clamp_val(nice, MIN_NICE, MAX_NICE); -+ if (increment < 0 && !can_nice(current, nice)) -+ return -EPERM; -+ -+ retval = security_task_setnice(current, nice); -+ if (retval) -+ return retval; -+ -+ set_user_nice(current, nice); -+ return 0; -+} -+ -+#endif -+ -+/** -+ * task_prio - return the priority value of a given task. -+ * @p: the task in question. -+ * -+ * Return: The priority value as seen by users in /proc. -+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes -+ * from 0 (SCHED_ISO) up to 82 (nice +19 SCHED_IDLEPRIO). -+ */ -+int task_prio(const struct task_struct *p) -+{ -+ int delta, prio = p->prio - MAX_RT_PRIO; -+ -+ /* rt tasks and iso tasks */ -+ if (prio <= 0) -+ goto out; -+ -+ /* Convert to ms to avoid overflows */ -+ delta = NS_TO_MS(p->deadline - task_rq(p)->niffies); -+ if (unlikely(delta < 0)) -+ delta = 0; -+ delta = delta * 40 / ms_longest_deadline_diff(); -+ if (delta <= 80) -+ prio += delta; -+ if (idleprio_task(p)) -+ prio += 40; -+out: -+ return prio; -+} -+ -+/** -+ * idle_cpu - is a given CPU idle currently? -+ * @cpu: the processor in question. -+ * -+ * Return: 1 if the CPU is currently idle. 0 otherwise. -+ */ -+int idle_cpu(int cpu) -+{ -+ return cpu_curr(cpu) == cpu_rq(cpu)->idle; -+} -+ -+/** -+ * available_idle_cpu - is a given CPU idle for enqueuing work. -+ * @cpu: the CPU in question. -+ * -+ * Return: 1 if the CPU is currently idle. 0 otherwise. -+ */ -+int available_idle_cpu(int cpu) -+{ -+ if (!idle_cpu(cpu)) -+ return 0; -+ -+ if (vcpu_is_preempted(cpu)) -+ return 0; -+ -+ return 1; -+} -+ -+/** -+ * idle_task - return the idle task for a given CPU. -+ * @cpu: the processor in question. -+ * -+ * Return: The idle task for the CPU @cpu. -+ */ -+struct task_struct *idle_task(int cpu) -+{ -+ return cpu_rq(cpu)->idle; -+} -+ -+/** -+ * find_process_by_pid - find a process with a matching PID value. -+ * @pid: the pid in question. -+ * -+ * The task of @pid, if found. %NULL otherwise. -+ */ -+static inline struct task_struct *find_process_by_pid(pid_t pid) -+{ -+ return pid ? find_task_by_vpid(pid) : current; -+} -+ -+/* Actually do priority change: must hold rq lock. */ -+static void __setscheduler(struct task_struct *p, struct rq *rq, int policy, -+ int prio, const struct sched_attr *attr, -+ bool keep_boost) -+{ -+ int oldrtprio, oldprio; -+ -+ /* -+ * If params can't change scheduling class changes aren't allowed -+ * either. -+ */ -+ if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS) -+ return; -+ -+ p->policy = policy; -+ oldrtprio = p->rt_priority; -+ p->rt_priority = prio; -+ p->normal_prio = normal_prio(p); -+ oldprio = p->prio; -+ /* -+ * Keep a potential priority boosting if called from -+ * sched_setscheduler(). -+ */ -+ p->prio = normal_prio(p); -+ if (keep_boost) -+ p->prio = rt_effective_prio(p, p->prio); -+ -+ if (task_running(rq, p)) { -+ set_rq_task(rq, p); -+ resched_task(p); -+ } else if (task_queued(p)) { -+ dequeue_task(rq, p, DEQUEUE_SAVE); -+ enqueue_task(rq, p, ENQUEUE_RESTORE); -+ if (p->prio < oldprio || p->rt_priority > oldrtprio) -+ try_preempt(p, rq); -+ } -+} -+ -+/* -+ * Check the target process has a UID that matches the current process's -+ */ -+static bool check_same_owner(struct task_struct *p) -+{ -+ const struct cred *cred = current_cred(), *pcred; -+ bool match; -+ -+ rcu_read_lock(); -+ pcred = __task_cred(p); -+ match = (uid_eq(cred->euid, pcred->euid) || -+ uid_eq(cred->euid, pcred->uid)); -+ rcu_read_unlock(); -+ return match; -+} -+ -+static int __sched_setscheduler(struct task_struct *p, -+ const struct sched_attr *attr, -+ bool user, bool pi) -+{ -+ int retval, policy = attr->sched_policy, oldpolicy = -1, priority = attr->sched_priority; -+ unsigned long rlim_rtprio = 0; -+ struct rq_flags rf; -+ int reset_on_fork; -+ struct rq *rq; -+ -+ /* The pi code expects interrupts enabled */ -+ BUG_ON(pi && in_interrupt()); -+ -+ if (is_rt_policy(policy) && !capable(CAP_SYS_NICE)) { -+ unsigned long lflags; -+ -+ if (!lock_task_sighand(p, &lflags)) -+ return -ESRCH; -+ rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO); -+ unlock_task_sighand(p, &lflags); -+ if (rlim_rtprio) -+ goto recheck; -+ /* -+ * If the caller requested an RT policy without having the -+ * necessary rights, we downgrade the policy to SCHED_ISO. -+ * We also set the parameter to zero to pass the checks. -+ */ -+ policy = SCHED_ISO; -+ priority = 0; -+ } -+recheck: -+ /* Double check policy once rq lock held */ -+ if (policy < 0) { -+ reset_on_fork = p->sched_reset_on_fork; -+ policy = oldpolicy = p->policy; -+ } else { -+ reset_on_fork = !!(policy & SCHED_RESET_ON_FORK); -+ policy &= ~SCHED_RESET_ON_FORK; -+ -+ if (!SCHED_RANGE(policy)) -+ return -EINVAL; -+ } -+ -+ if (attr->sched_flags & ~(SCHED_FLAG_ALL | SCHED_FLAG_SUGOV)) -+ return -EINVAL; -+ -+ /* -+ * Valid priorities for SCHED_FIFO and SCHED_RR are -+ * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and -+ * SCHED_BATCH is 0. -+ */ -+ if (priority < 0 || -+ (p->mm && priority > MAX_USER_RT_PRIO - 1) || -+ (!p->mm && priority > MAX_RT_PRIO - 1)) -+ return -EINVAL; -+ if (is_rt_policy(policy) != (priority != 0)) -+ return -EINVAL; -+ -+ /* -+ * Allow unprivileged RT tasks to decrease priority: -+ */ -+ if (user && !capable(CAP_SYS_NICE)) { -+ if (is_rt_policy(policy)) { -+ unsigned long rlim_rtprio = -+ task_rlimit(p, RLIMIT_RTPRIO); -+ -+ /* Can't set/change the rt policy */ -+ if (policy != p->policy && !rlim_rtprio) -+ return -EPERM; -+ -+ /* Can't increase priority */ -+ if (priority > p->rt_priority && -+ priority > rlim_rtprio) -+ return -EPERM; -+ } else { -+ switch (p->policy) { -+ /* -+ * Can only downgrade policies but not back to -+ * SCHED_NORMAL -+ */ -+ case SCHED_ISO: -+ if (policy == SCHED_ISO) -+ goto out; -+ if (policy != SCHED_NORMAL) -+ return -EPERM; -+ break; -+ case SCHED_BATCH: -+ if (policy == SCHED_BATCH) -+ goto out; -+ if (policy != SCHED_IDLEPRIO) -+ return -EPERM; -+ break; -+ case SCHED_IDLEPRIO: -+ if (policy == SCHED_IDLEPRIO) -+ goto out; -+ return -EPERM; -+ default: -+ break; -+ } -+ } -+ -+ /* Can't change other user's priorities */ -+ if (!check_same_owner(p)) -+ return -EPERM; -+ -+ /* Normal users shall not reset the sched_reset_on_fork flag: */ -+ if (p->sched_reset_on_fork && !reset_on_fork) -+ return -EPERM; -+ } -+ -+ if (user) { -+ retval = security_task_setscheduler(p); -+ if (retval) -+ return retval; -+ } -+ -+ if (pi) -+ cpuset_read_lock(); -+ -+ /* -+ * Make sure no PI-waiters arrive (or leave) while we are -+ * changing the priority of the task: -+ * -+ * To be able to change p->policy safely, the runqueue lock must be -+ * held. -+ */ -+ rq = task_rq_lock(p, &rf); -+ update_rq_clock(rq); -+ -+ /* -+ * Changing the policy of the stop threads its a very bad idea: -+ */ -+ if (p == rq->stop) { -+ retval = -EINVAL; -+ goto unlock; -+ } -+ -+ /* -+ * If not changing anything there's no need to proceed further: -+ */ -+ if (unlikely(policy == p->policy && (!is_rt_policy(policy) || -+ priority == p->rt_priority))) { -+ retval = 0; -+ goto unlock; -+ } -+ -+ /* Re-check policy now with rq lock held */ -+ if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { -+ policy = oldpolicy = -1; -+ task_rq_unlock(rq, p, &rf); -+ if (pi) -+ cpuset_read_unlock(); -+ goto recheck; -+ } -+ p->sched_reset_on_fork = reset_on_fork; -+ -+ __setscheduler(p, rq, policy, priority, attr, pi); -+ -+ /* Avoid rq from going away on us: */ -+ preempt_disable(); -+ task_rq_unlock(rq, p, &rf); -+ -+ if (pi) { -+ cpuset_read_unlock(); -+ rt_mutex_adjust_pi(p); -+ } -+ preempt_enable(); -+out: -+ return 0; -+ -+unlock: -+ task_rq_unlock(rq, p, &rf); -+ if (pi) -+ cpuset_read_unlock(); -+ return retval; -+} -+ -+static int _sched_setscheduler(struct task_struct *p, int policy, -+ const struct sched_param *param, bool check) -+{ -+ struct sched_attr attr = { -+ .sched_policy = policy, -+ .sched_priority = param->sched_priority, -+ .sched_nice = PRIO_TO_NICE(p->static_prio), -+ }; -+ -+ return __sched_setscheduler(p, &attr, check, true); -+} -+/** -+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread. -+ * @p: the task in question. -+ * @policy: new policy. -+ * @param: structure containing the new RT priority. -+ * -+ * Return: 0 on success. An error code otherwise. -+ * -+ * NOTE that the task may be already dead. -+ */ -+int sched_setscheduler(struct task_struct *p, int policy, -+ const struct sched_param *param) -+{ -+ return _sched_setscheduler(p, policy, param, true); -+} -+ -+EXPORT_SYMBOL_GPL(sched_setscheduler); -+ -+int sched_setattr(struct task_struct *p, const struct sched_attr *attr) -+{ -+ return __sched_setscheduler(p, attr, true, true); -+} -+EXPORT_SYMBOL_GPL(sched_setattr); -+ -+int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr) -+{ -+ return __sched_setscheduler(p, attr, false, true); -+} -+ -+/** -+ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. -+ * @p: the task in question. -+ * @policy: new policy. -+ * @param: structure containing the new RT priority. -+ * -+ * Just like sched_setscheduler, only don't bother checking if the -+ * current context has permission. For example, this is needed in -+ * stop_machine(): we create temporary high priority worker threads, -+ * but our caller might not have that capability. -+ * -+ * Return: 0 on success. An error code otherwise. -+ */ -+int sched_setscheduler_nocheck(struct task_struct *p, int policy, -+ const struct sched_param *param) -+{ -+ return _sched_setscheduler(p, policy, param, false); -+} -+EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck); -+ -+static int -+do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) -+{ -+ struct sched_param lparam; -+ struct task_struct *p; -+ int retval; -+ -+ if (!param || pid < 0) -+ return -EINVAL; -+ if (copy_from_user(&lparam, param, sizeof(struct sched_param))) -+ return -EFAULT; -+ -+ rcu_read_lock(); -+ retval = -ESRCH; -+ p = find_process_by_pid(pid); -+ if (likely(p)) -+ get_task_struct(p); -+ rcu_read_unlock(); -+ -+ if (likely(p)) { -+ retval = sched_setscheduler(p, policy, &lparam); -+ put_task_struct(p); -+ } -+ -+ return retval; -+} -+ -+/* -+ * Mimics kernel/events/core.c perf_copy_attr(). -+ */ -+static int sched_copy_attr(struct sched_attr __user *uattr, -+ struct sched_attr *attr) -+{ -+ u32 size; -+ int ret; -+ -+ /* Zero the full structure, so that a short copy will be nice: */ -+ memset(attr, 0, sizeof(*attr)); -+ -+ ret = get_user(size, &uattr->size); -+ if (ret) -+ return ret; -+ -+ /* ABI compatibility quirk: */ -+ if (!size) -+ size = SCHED_ATTR_SIZE_VER0; -+ -+ if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE) -+ goto err_size; -+ -+ ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size); -+ if (ret) { -+ if (ret == -E2BIG) -+ goto err_size; -+ return ret; -+ } -+ -+ /* -+ * XXX: Do we want to be lenient like existing syscalls; or do we want -+ * to be strict and return an error on out-of-bounds values? -+ */ -+ attr->sched_nice = clamp(attr->sched_nice, -20, 19); -+ -+ /* sched/core.c uses zero here but we already know ret is zero */ -+ return 0; -+ -+err_size: -+ put_user(sizeof(*attr), &uattr->size); -+ return -E2BIG; -+} -+ -+/* -+ * sched_setparam() passes in -1 for its policy, to let the functions -+ * it calls know not to change it. -+ */ -+#define SETPARAM_POLICY -1 -+ -+/** -+ * sys_sched_setscheduler - set/change the scheduler policy and RT priority -+ * @pid: the pid in question. -+ * @policy: new policy. -+ * @param: structure containing the new RT priority. -+ * -+ * Return: 0 on success. An error code otherwise. -+ */ -+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param) -+{ -+ if (policy < 0) -+ return -EINVAL; -+ -+ return do_sched_setscheduler(pid, policy, param); -+} -+ -+/** -+ * sys_sched_setparam - set/change the RT priority of a thread -+ * @pid: the pid in question. -+ * @param: structure containing the new RT priority. -+ * -+ * Return: 0 on success. An error code otherwise. -+ */ -+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) -+{ -+ return do_sched_setscheduler(pid, SETPARAM_POLICY, param); -+} -+ -+/** -+ * sys_sched_setattr - same as above, but with extended sched_attr -+ * @pid: the pid in question. -+ * @uattr: structure containing the extended parameters. -+ */ -+SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, -+ unsigned int, flags) -+{ -+ struct sched_attr attr; -+ struct task_struct *p; -+ int retval; -+ -+ if (!uattr || pid < 0 || flags) -+ return -EINVAL; -+ -+ retval = sched_copy_attr(uattr, &attr); -+ if (retval) -+ return retval; -+ -+ if ((int)attr.sched_policy < 0) -+ return -EINVAL; -+ if (attr.sched_flags & SCHED_FLAG_KEEP_POLICY) -+ attr.sched_policy = SETPARAM_POLICY; -+ -+ rcu_read_lock(); -+ retval = -ESRCH; -+ p = find_process_by_pid(pid); -+ if (likely(p)) -+ get_task_struct(p); -+ rcu_read_unlock(); -+ -+ if (likely(p)) { -+ retval = sched_setattr(p, &attr); -+ put_task_struct(p); -+ } -+ -+ return retval; -+} -+ -+/** -+ * sys_sched_getscheduler - get the policy (scheduling class) of a thread -+ * @pid: the pid in question. -+ * -+ * Return: On success, the policy of the thread. Otherwise, a negative error -+ * code. -+ */ -+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) -+{ -+ struct task_struct *p; -+ int retval = -EINVAL; -+ -+ if (pid < 0) -+ goto out_nounlock; -+ -+ retval = -ESRCH; -+ rcu_read_lock(); -+ p = find_process_by_pid(pid); -+ if (p) { -+ retval = security_task_getscheduler(p); -+ if (!retval) -+ retval = p->policy; -+ } -+ rcu_read_unlock(); -+ -+out_nounlock: -+ return retval; -+} -+ -+/** -+ * sys_sched_getscheduler - get the RT priority of a thread -+ * @pid: the pid in question. -+ * @param: structure containing the RT priority. -+ * -+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error -+ * code. -+ */ -+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) -+{ -+ struct sched_param lp = { .sched_priority = 0 }; -+ struct task_struct *p; -+ int retval = -EINVAL; -+ -+ if (!param || pid < 0) -+ goto out_nounlock; -+ -+ rcu_read_lock(); -+ p = find_process_by_pid(pid); -+ retval = -ESRCH; -+ if (!p) -+ goto out_unlock; -+ -+ retval = security_task_getscheduler(p); -+ if (retval) -+ goto out_unlock; -+ -+ if (has_rt_policy(p)) -+ lp.sched_priority = p->rt_priority; -+ rcu_read_unlock(); -+ -+ /* -+ * This one might sleep, we cannot do it with a spinlock held ... -+ */ -+ retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0; -+ -+out_nounlock: -+ return retval; -+ -+out_unlock: -+ rcu_read_unlock(); -+ return retval; -+} -+ -+/* -+ * Copy the kernel size attribute structure (which might be larger -+ * than what user-space knows about) to user-space. -+ * -+ * Note that all cases are valid: user-space buffer can be larger or -+ * smaller than the kernel-space buffer. The usual case is that both -+ * have the same size. -+ */ -+static int -+sched_attr_copy_to_user(struct sched_attr __user *uattr, -+ struct sched_attr *kattr, -+ unsigned int usize) -+{ -+ unsigned int ksize = sizeof(*kattr); -+ -+ if (!access_ok(uattr, usize)) -+ return -EFAULT; -+ -+ /* -+ * sched_getattr() ABI forwards and backwards compatibility: -+ * -+ * If usize == ksize then we just copy everything to user-space and all is good. -+ * -+ * If usize < ksize then we only copy as much as user-space has space for, -+ * this keeps ABI compatibility as well. We skip the rest. -+ * -+ * If usize > ksize then user-space is using a newer version of the ABI, -+ * which part the kernel doesn't know about. Just ignore it - tooling can -+ * detect the kernel's knowledge of attributes from the attr->size value -+ * which is set to ksize in this case. -+ */ -+ kattr->size = min(usize, ksize); -+ -+ if (copy_to_user(uattr, kattr, kattr->size)) -+ return -EFAULT; -+ -+ return 0; -+} -+ -+/** -+ * sys_sched_getattr - similar to sched_getparam, but with sched_attr -+ * @pid: the pid in question. -+ * @uattr: structure containing the extended parameters. -+ * @usize: sizeof(attr) for fwd/bwd comp. -+ * @flags: for future extension. -+ */ -+SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, -+ unsigned int, usize, unsigned int, flags) -+{ -+ struct sched_attr kattr = { }; -+ struct task_struct *p; -+ int retval; -+ -+ if (!uattr || pid < 0 || usize > PAGE_SIZE || -+ usize < SCHED_ATTR_SIZE_VER0 || flags) -+ return -EINVAL; -+ -+ rcu_read_lock(); -+ p = find_process_by_pid(pid); -+ retval = -ESRCH; -+ if (!p) -+ goto out_unlock; -+ -+ retval = security_task_getscheduler(p); -+ if (retval) -+ goto out_unlock; -+ -+ kattr.sched_policy = p->policy; -+ if (rt_task(p)) -+ kattr.sched_priority = p->rt_priority; -+ else -+ kattr.sched_nice = task_nice(p); -+ -+ rcu_read_unlock(); -+ -+ return sched_attr_copy_to_user(uattr, &kattr, usize); -+ -+out_unlock: -+ rcu_read_unlock(); -+ return retval; -+} -+ -+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) -+{ -+ cpumask_var_t cpus_allowed, new_mask; -+ struct task_struct *p; -+ int retval; -+ -+ rcu_read_lock(); -+ -+ p = find_process_by_pid(pid); -+ if (!p) { -+ rcu_read_unlock(); -+ return -ESRCH; -+ } -+ -+ /* Prevent p going away */ -+ get_task_struct(p); -+ rcu_read_unlock(); -+ -+ if (p->flags & PF_NO_SETAFFINITY) { -+ retval = -EINVAL; -+ goto out_put_task; -+ } -+ if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { -+ retval = -ENOMEM; -+ goto out_put_task; -+ } -+ if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) { -+ retval = -ENOMEM; -+ goto out_free_cpus_allowed; -+ } -+ retval = -EPERM; -+ if (!check_same_owner(p)) { -+ rcu_read_lock(); -+ if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { -+ rcu_read_unlock(); -+ goto out_unlock; -+ } -+ rcu_read_unlock(); -+ } -+ -+ retval = security_task_setscheduler(p); -+ if (retval) -+ goto out_unlock; -+ -+ cpuset_cpus_allowed(p, cpus_allowed); -+ cpumask_and(new_mask, in_mask, cpus_allowed); -+again: -+ retval = __set_cpus_allowed_ptr(p, new_mask, true); -+ -+ if (!retval) { -+ cpuset_cpus_allowed(p, cpus_allowed); -+ if (!cpumask_subset(new_mask, cpus_allowed)) { -+ /* -+ * We must have raced with a concurrent cpuset -+ * update. Just reset the cpus_allowed to the -+ * cpuset's cpus_allowed -+ */ -+ cpumask_copy(new_mask, cpus_allowed); -+ goto again; -+ } -+ } -+out_unlock: -+ free_cpumask_var(new_mask); -+out_free_cpus_allowed: -+ free_cpumask_var(cpus_allowed); -+out_put_task: -+ put_task_struct(p); -+ return retval; -+} -+ -+static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, -+ cpumask_t *new_mask) -+{ -+ if (len < cpumask_size()) -+ cpumask_clear(new_mask); -+ else if (len > cpumask_size()) -+ len = cpumask_size(); -+ -+ return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0; -+} -+ -+ -+/** -+ * sys_sched_setaffinity - set the CPU affinity of a process -+ * @pid: pid of the process -+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr -+ * @user_mask_ptr: user-space pointer to the new CPU mask -+ * -+ * Return: 0 on success. An error code otherwise. -+ */ -+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, -+ unsigned long __user *, user_mask_ptr) -+{ -+ cpumask_var_t new_mask; -+ int retval; -+ -+ if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) -+ return -ENOMEM; -+ -+ retval = get_user_cpu_mask(user_mask_ptr, len, new_mask); -+ if (retval == 0) -+ retval = sched_setaffinity(pid, new_mask); -+ free_cpumask_var(new_mask); -+ return retval; -+} -+ -+long sched_getaffinity(pid_t pid, cpumask_t *mask) -+{ -+ struct task_struct *p; -+ unsigned long flags; -+ int retval; -+ -+ get_online_cpus(); -+ rcu_read_lock(); -+ -+ retval = -ESRCH; -+ p = find_process_by_pid(pid); -+ if (!p) -+ goto out_unlock; -+ -+ retval = security_task_getscheduler(p); -+ if (retval) -+ goto out_unlock; -+ -+ raw_spin_lock_irqsave(&p->pi_lock, flags); -+ cpumask_and(mask, &p->cpus_mask, cpu_active_mask); -+ raw_spin_unlock_irqrestore(&p->pi_lock, flags); -+ -+out_unlock: -+ rcu_read_unlock(); -+ put_online_cpus(); -+ -+ return retval; -+} -+ -+/** -+ * sys_sched_getaffinity - get the CPU affinity of a process -+ * @pid: pid of the process -+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr -+ * @user_mask_ptr: user-space pointer to hold the current CPU mask -+ * -+ * Return: 0 on success. An error code otherwise. -+ */ -+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, -+ unsigned long __user *, user_mask_ptr) -+{ -+ int ret; -+ cpumask_var_t mask; -+ -+ if ((len * BITS_PER_BYTE) < nr_cpu_ids) -+ return -EINVAL; -+ if (len & (sizeof(unsigned long)-1)) -+ return -EINVAL; -+ -+ if (!alloc_cpumask_var(&mask, GFP_KERNEL)) -+ return -ENOMEM; -+ -+ ret = sched_getaffinity(pid, mask); -+ if (ret == 0) { -+ unsigned int retlen = min(len, cpumask_size()); -+ -+ if (copy_to_user(user_mask_ptr, mask, retlen)) -+ ret = -EFAULT; -+ else -+ ret = retlen; -+ } -+ free_cpumask_var(mask); -+ -+ return ret; -+} -+ -+/** -+ * sys_sched_yield - yield the current processor to other threads. -+ * -+ * This function yields the current CPU to other tasks. It does this by -+ * scheduling away the current task. If it still has the earliest deadline -+ * it will be scheduled again as the next task. -+ * -+ * Return: 0. -+ */ -+static void do_sched_yield(void) -+{ -+ struct rq *rq; -+ -+ if (!sched_yield_type) -+ return; -+ -+ local_irq_disable(); -+ rq = this_rq(); -+ rq_lock(rq); -+ -+ if (sched_yield_type > 1) -+ time_slice_expired(current, rq); -+ schedstat_inc(rq->yld_count); -+ -+ /* -+ * Since we are going to call schedule() anyway, there's -+ * no need to preempt or enable interrupts: -+ */ -+ preempt_disable(); -+ rq_unlock(rq); -+ sched_preempt_enable_no_resched(); -+ -+ schedule(); -+} -+ -+SYSCALL_DEFINE0(sched_yield) -+{ -+ do_sched_yield(); -+ return 0; -+} -+ -+#ifndef CONFIG_PREEMPTION -+int __sched _cond_resched(void) -+{ -+ if (should_resched(0)) { -+ preempt_schedule_common(); -+ return 1; -+ } -+ rcu_all_qs(); -+ return 0; -+} -+EXPORT_SYMBOL(_cond_resched); -+#endif -+ -+/* -+ * __cond_resched_lock() - if a reschedule is pending, drop the given lock, -+ * call schedule, and on return reacquire the lock. -+ * -+ * This works OK both with and without CONFIG_PREEMPTION. We do strange low-level -+ * operations here to prevent schedule() from being called twice (once via -+ * spin_unlock(), once by hand). -+ */ -+int __cond_resched_lock(spinlock_t *lock) -+{ -+ int resched = should_resched(PREEMPT_LOCK_OFFSET); -+ int ret = 0; -+ -+ lockdep_assert_held(lock); -+ -+ if (spin_needbreak(lock) || resched) { -+ spin_unlock(lock); -+ if (resched) -+ preempt_schedule_common(); -+ else -+ cpu_relax(); -+ ret = 1; -+ spin_lock(lock); -+ } -+ return ret; -+} -+EXPORT_SYMBOL(__cond_resched_lock); -+ -+/** -+ * yield - yield the current processor to other threads. -+ * -+ * Do not ever use this function, there's a 99% chance you're doing it wrong. -+ * -+ * The scheduler is at all times free to pick the calling task as the most -+ * eligible task to run, if removing the yield() call from your code breaks -+ * it, its already broken. -+ * -+ * Typical broken usage is: -+ * -+ * while (!event) -+ * yield(); -+ * -+ * where one assumes that yield() will let 'the other' process run that will -+ * make event true. If the current task is a SCHED_FIFO task that will never -+ * happen. Never use yield() as a progress guarantee!! -+ * -+ * If you want to use yield() to wait for something, use wait_event(). -+ * If you want to use yield() to be 'nice' for others, use cond_resched(). -+ * If you still want to use yield(), do not! -+ */ -+void __sched yield(void) -+{ -+ set_current_state(TASK_RUNNING); -+ do_sched_yield(); -+} -+EXPORT_SYMBOL(yield); -+ -+/** -+ * yield_to - yield the current processor to another thread in -+ * your thread group, or accelerate that thread toward the -+ * processor it's on. -+ * @p: target task -+ * @preempt: whether task preemption is allowed or not -+ * -+ * It's the caller's job to ensure that the target task struct -+ * can't go away on us before we can do any checks. -+ * -+ * Return: -+ * true (>0) if we indeed boosted the target task. -+ * false (0) if we failed to boost the target. -+ * -ESRCH if there's no task to yield to. -+ */ -+int __sched yield_to(struct task_struct *p, bool preempt) -+{ -+ struct task_struct *rq_p; -+ struct rq *rq, *p_rq; -+ unsigned long flags; -+ int yielded = 0; -+ -+ local_irq_save(flags); -+ rq = this_rq(); -+ -+again: -+ p_rq = task_rq(p); -+ /* -+ * If we're the only runnable task on the rq and target rq also -+ * has only one task, there's absolutely no point in yielding. -+ */ -+ if (task_running(p_rq, p) || p->state) { -+ yielded = -ESRCH; -+ goto out_irq; -+ } -+ -+ double_rq_lock(rq, p_rq); -+ if (unlikely(task_rq(p) != p_rq)) { -+ double_rq_unlock(rq, p_rq); -+ goto again; -+ } -+ -+ yielded = 1; -+ schedstat_inc(rq->yld_count); -+ rq_p = rq->curr; -+ if (p->deadline > rq_p->deadline) -+ p->deadline = rq_p->deadline; -+ p->time_slice += rq_p->time_slice; -+ if (p->time_slice > timeslice()) -+ p->time_slice = timeslice(); -+ time_slice_expired(rq_p, rq); -+ if (preempt && rq != p_rq) -+ resched_task(p_rq->curr); -+ double_rq_unlock(rq, p_rq); -+out_irq: -+ local_irq_restore(flags); -+ -+ if (yielded > 0) -+ schedule(); -+ return yielded; -+} -+EXPORT_SYMBOL_GPL(yield_to); -+ -+int io_schedule_prepare(void) -+{ -+ int old_iowait = current->in_iowait; -+ -+ current->in_iowait = 1; -+ blk_schedule_flush_plug(current); -+ -+ return old_iowait; -+} -+ -+void io_schedule_finish(int token) -+{ -+ current->in_iowait = token; -+} -+ -+/* -+ * This task is about to go to sleep on IO. Increment rq->nr_iowait so -+ * that process accounting knows that this is a task in IO wait state. -+ * -+ * But don't do that if it is a deliberate, throttling IO wait (this task -+ * has set its backing_dev_info: the queue against which it should throttle) -+ */ -+ -+long __sched io_schedule_timeout(long timeout) -+{ -+ int token; -+ long ret; -+ -+ token = io_schedule_prepare(); -+ ret = schedule_timeout(timeout); -+ io_schedule_finish(token); -+ -+ return ret; -+} -+EXPORT_SYMBOL(io_schedule_timeout); -+ -+void __sched io_schedule(void) -+{ -+ int token; -+ -+ token = io_schedule_prepare(); -+ schedule(); -+ io_schedule_finish(token); -+} -+EXPORT_SYMBOL(io_schedule); -+ -+/** -+ * sys_sched_get_priority_max - return maximum RT priority. -+ * @policy: scheduling class. -+ * -+ * Return: On success, this syscall returns the maximum -+ * rt_priority that can be used by a given scheduling class. -+ * On failure, a negative error code is returned. -+ */ -+SYSCALL_DEFINE1(sched_get_priority_max, int, policy) -+{ -+ int ret = -EINVAL; -+ -+ switch (policy) { -+ case SCHED_FIFO: -+ case SCHED_RR: -+ ret = MAX_USER_RT_PRIO-1; -+ break; -+ case SCHED_NORMAL: -+ case SCHED_BATCH: -+ case SCHED_ISO: -+ case SCHED_IDLEPRIO: -+ ret = 0; -+ break; -+ } -+ return ret; -+} -+ -+/** -+ * sys_sched_get_priority_min - return minimum RT priority. -+ * @policy: scheduling class. -+ * -+ * Return: On success, this syscall returns the minimum -+ * rt_priority that can be used by a given scheduling class. -+ * On failure, a negative error code is returned. -+ */ -+SYSCALL_DEFINE1(sched_get_priority_min, int, policy) -+{ -+ int ret = -EINVAL; -+ -+ switch (policy) { -+ case SCHED_FIFO: -+ case SCHED_RR: -+ ret = 1; -+ break; -+ case SCHED_NORMAL: -+ case SCHED_BATCH: -+ case SCHED_ISO: -+ case SCHED_IDLEPRIO: -+ ret = 0; -+ break; -+ } -+ return ret; -+} -+ -+static int sched_rr_get_interval(pid_t pid, struct timespec64 *t) -+{ -+ struct task_struct *p; -+ unsigned int time_slice; -+ struct rq_flags rf; -+ struct rq *rq; -+ int retval; -+ -+ if (pid < 0) -+ return -EINVAL; -+ -+ retval = -ESRCH; -+ rcu_read_lock(); -+ p = find_process_by_pid(pid); -+ if (!p) -+ goto out_unlock; -+ -+ retval = security_task_getscheduler(p); -+ if (retval) -+ goto out_unlock; -+ -+ rq = task_rq_lock(p, &rf); -+ time_slice = p->policy == SCHED_FIFO ? 0 : MS_TO_NS(task_timeslice(p)); -+ task_rq_unlock(rq, p, &rf); -+ -+ rcu_read_unlock(); -+ *t = ns_to_timespec64(time_slice); -+ return 0; -+ -+out_unlock: -+ rcu_read_unlock(); -+ return retval; -+} -+ -+/** -+ * sys_sched_rr_get_interval - return the default timeslice of a process. -+ * @pid: pid of the process. -+ * @interval: userspace pointer to the timeslice value. -+ * -+ * this syscall writes the default timeslice value of a given process -+ * into the user-space timespec buffer. A value of '0' means infinity. -+ * -+ * Return: On success, 0 and the timeslice is in @interval. Otherwise, -+ * an error code. -+ */ -+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, -+ struct __kernel_timespec __user *, interval) -+{ -+ struct timespec64 t; -+ int retval = sched_rr_get_interval(pid, &t); -+ -+ if (retval == 0) -+ retval = put_timespec64(&t, interval); -+ -+ return retval; -+} -+ -+#ifdef CONFIG_COMPAT_32BIT_TIME -+SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid, -+ struct old_timespec32 __user *, interval) -+{ -+ struct timespec64 t; -+ int retval = sched_rr_get_interval(pid, &t); -+ -+ if (retval == 0) -+ retval = put_old_timespec32(&t, interval); -+ return retval; -+} -+#endif -+ -+void sched_show_task(struct task_struct *p) -+{ -+ unsigned long free = 0; -+ int ppid; -+ -+ if (!try_get_task_stack(p)) -+ return; -+ -+ printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p)); -+ -+ if (p->state == TASK_RUNNING) -+ printk(KERN_CONT " running task "); -+#ifdef CONFIG_DEBUG_STACK_USAGE -+ free = stack_not_used(p); -+#endif -+ ppid = 0; -+ rcu_read_lock(); -+ if (pid_alive(p)) -+ ppid = task_pid_nr(rcu_dereference(p->real_parent)); -+ rcu_read_unlock(); -+ printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, -+ task_pid_nr(p), ppid, -+ (unsigned long)task_thread_info(p)->flags); -+ -+ print_worker_info(KERN_INFO, p); -+ show_stack(p, NULL); -+ put_task_stack(p); -+} -+EXPORT_SYMBOL_GPL(sched_show_task); -+ -+static inline bool -+state_filter_match(unsigned long state_filter, struct task_struct *p) -+{ -+ /* no filter, everything matches */ -+ if (!state_filter) -+ return true; -+ -+ /* filter, but doesn't match */ -+ if (!(p->state & state_filter)) -+ return false; -+ -+ /* -+ * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows -+ * TASK_KILLABLE). -+ */ -+ if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE) -+ return false; -+ -+ return true; -+} -+ -+void show_state_filter(unsigned long state_filter) -+{ -+ struct task_struct *g, *p; -+ -+#if BITS_PER_LONG == 32 -+ printk(KERN_INFO -+ " task PC stack pid father\n"); -+#else -+ printk(KERN_INFO -+ " task PC stack pid father\n"); -+#endif -+ rcu_read_lock(); -+ for_each_process_thread(g, p) { -+ /* -+ * reset the NMI-timeout, listing all files on a slow -+ * console might take a lot of time: -+ * Also, reset softlockup watchdogs on all CPUs, because -+ * another CPU might be blocked waiting for us to process -+ * an IPI. -+ */ -+ touch_nmi_watchdog(); -+ touch_all_softlockup_watchdogs(); -+ if (state_filter_match(state_filter, p)) -+ sched_show_task(p); -+ } -+ -+ rcu_read_unlock(); -+ /* -+ * Only show locks if all tasks are dumped: -+ */ -+ if (!state_filter) -+ debug_show_all_locks(); -+} -+ -+void dump_cpu_task(int cpu) -+{ -+ pr_info("Task dump for CPU %d:\n", cpu); -+ sched_show_task(cpu_curr(cpu)); -+} -+ -+#ifdef CONFIG_SMP -+void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) -+{ -+ cpumask_copy(&p->cpus_mask, new_mask); -+ p->nr_cpus_allowed = cpumask_weight(new_mask); -+} -+ -+void __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) -+{ -+ struct rq *rq = task_rq(p); -+ -+ lockdep_assert_held(&p->pi_lock); -+ -+ cpumask_copy(&p->cpus_mask, new_mask); -+ -+ if (task_queued(p)) { -+ /* -+ * Because __kthread_bind() calls this on blocked tasks without -+ * holding rq->lock. -+ */ -+ lockdep_assert_held(rq->lock); -+ } -+} -+ -+/* -+ * Calling do_set_cpus_allowed from outside the scheduler code should not be -+ * called on a running or queued task. We should be holding pi_lock. -+ */ -+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) -+{ -+ __do_set_cpus_allowed(p, new_mask); -+ if (needs_other_cpu(p, task_cpu(p))) { -+ struct rq *rq; -+ -+ rq = __task_rq_lock(p, NULL); -+ set_task_cpu(p, valid_task_cpu(p)); -+ resched_task(p); -+ __task_rq_unlock(rq, NULL); -+ } -+} -+#endif -+ -+/** -+ * init_idle - set up an idle thread for a given CPU -+ * @idle: task in question -+ * @cpu: cpu the idle task belongs to -+ * -+ * NOTE: this function does not set the idle thread's NEED_RESCHED -+ * flag, to make booting more robust. -+ */ -+void init_idle(struct task_struct *idle, int cpu) -+{ -+ struct rq *rq = cpu_rq(cpu); -+ unsigned long flags; -+ -+ raw_spin_lock_irqsave(&idle->pi_lock, flags); -+ raw_spin_lock(rq->lock); -+ idle->last_ran = rq->niffies; -+ time_slice_expired(idle, rq); -+ idle->state = TASK_RUNNING; -+ /* Setting prio to illegal value shouldn't matter when never queued */ -+ idle->prio = PRIO_LIMIT; -+ -+ kasan_unpoison_task_stack(idle); -+ -+#ifdef CONFIG_SMP -+ /* -+ * It's possible that init_idle() gets called multiple times on a task, -+ * in that case do_set_cpus_allowed() will not do the right thing. -+ * -+ * And since this is boot we can forgo the serialisation. -+ */ -+ set_cpus_allowed_common(idle, cpumask_of(cpu)); -+#ifdef CONFIG_SMT_NICE -+ idle->smt_bias = 0; -+#endif -+#endif -+ set_rq_task(rq, idle); -+ -+ /* Silence PROVE_RCU */ -+ rcu_read_lock(); -+ set_task_cpu(idle, cpu); -+ rcu_read_unlock(); -+ -+ rq->idle = idle; -+ rcu_assign_pointer(rq->curr, idle); -+ idle->on_rq = TASK_ON_RQ_QUEUED; -+ raw_spin_unlock(rq->lock); -+ raw_spin_unlock_irqrestore(&idle->pi_lock, flags); -+ -+ /* Set the preempt count _outside_ the spinlocks! */ -+ init_idle_preempt_count(idle, cpu); -+ -+ ftrace_graph_init_idle_task(idle, cpu); -+ vtime_init_idle(idle, cpu); -+#ifdef CONFIG_SMP -+ sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); -+#endif -+} -+ -+int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur, -+ const struct cpumask __maybe_unused *trial) -+{ -+ return 1; -+} -+ -+int task_can_attach(struct task_struct *p, -+ const struct cpumask *cs_cpus_allowed) -+{ -+ int ret = 0; -+ -+ /* -+ * Kthreads which disallow setaffinity shouldn't be moved -+ * to a new cpuset; we don't want to change their CPU -+ * affinity and isolating such threads by their set of -+ * allowed nodes is unnecessary. Thus, cpusets are not -+ * applicable for such threads. This prevents checking for -+ * success of set_cpus_allowed_ptr() on all attached tasks -+ * before cpus_mask may be changed. -+ */ -+ if (p->flags & PF_NO_SETAFFINITY) -+ ret = -EINVAL; -+ -+ return ret; -+} -+ -+void resched_cpu(int cpu) -+{ -+ struct rq *rq = cpu_rq(cpu); -+ struct rq_flags rf; -+ -+ rq_lock_irqsave(rq, &rf); -+ if (cpu_online(cpu) || cpu == smp_processor_id()) -+ resched_curr(rq); -+ rq_unlock_irqrestore(rq, &rf); -+} -+ -+#ifdef CONFIG_SMP -+#ifdef CONFIG_NO_HZ_COMMON -+void select_nohz_load_balancer(int stop_tick) -+{ -+} -+ -+void set_cpu_sd_state_idle(void) {} -+void nohz_balance_enter_idle(int cpu) {} -+ -+/* -+ * In the semi idle case, use the nearest busy CPU for migrating timers -+ * from an idle CPU. This is good for power-savings. -+ * -+ * We don't do similar optimization for completely idle system, as -+ * selecting an idle CPU will add more delays to the timers than intended -+ * (as that CPU's timer base may not be uptodate wrt jiffies etc). -+ */ -+int get_nohz_timer_target(void) -+{ -+ int i, cpu = smp_processor_id(); -+ struct sched_domain *sd; -+ -+ if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER)) -+ return cpu; -+ -+ rcu_read_lock(); -+ for_each_domain(cpu, sd) { -+ for_each_cpu(i, sched_domain_span(sd)) { -+ if (cpu == i) -+ continue; -+ -+ if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) { -+ cpu = i; -+ cpu = i; -+ goto unlock; -+ } -+ } -+ } -+ -+ if (!housekeeping_cpu(cpu, HK_FLAG_TIMER)) -+ cpu = housekeeping_any_cpu(HK_FLAG_TIMER); -+unlock: -+ rcu_read_unlock(); -+ return cpu; -+} -+ -+/* -+ * When add_timer_on() enqueues a timer into the timer wheel of an -+ * idle CPU then this timer might expire before the next timer event -+ * which is scheduled to wake up that CPU. In case of a completely -+ * idle system the next event might even be infinite time into the -+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and -+ * leaves the inner idle loop so the newly added timer is taken into -+ * account when the CPU goes back to idle and evaluates the timer -+ * wheel for the next timer event. -+ */ -+void wake_up_idle_cpu(int cpu) -+{ -+ if (cpu == smp_processor_id()) -+ return; -+ -+ if (set_nr_and_not_polling(cpu_rq(cpu)->idle)) -+ smp_sched_reschedule(cpu); -+ else -+ trace_sched_wake_idle_without_ipi(cpu); -+} -+ -+static bool wake_up_full_nohz_cpu(int cpu) -+{ -+ /* -+ * We just need the target to call irq_exit() and re-evaluate -+ * the next tick. The nohz full kick at least implies that. -+ * If needed we can still optimize that later with an -+ * empty IRQ. -+ */ -+ if (cpu_is_offline(cpu)) -+ return true; /* Don't try to wake offline CPUs. */ -+ if (tick_nohz_full_cpu(cpu)) { -+ if (cpu != smp_processor_id() || -+ tick_nohz_tick_stopped()) -+ tick_nohz_full_kick_cpu(cpu); -+ return true; -+ } -+ -+ return false; -+} -+ -+/* -+ * Wake up the specified CPU. If the CPU is going offline, it is the -+ * caller's responsibility to deal with the lost wakeup, for example, -+ * by hooking into the CPU_DEAD notifier like timers and hrtimers do. -+ */ -+void wake_up_nohz_cpu(int cpu) -+{ -+ if (!wake_up_full_nohz_cpu(cpu)) -+ wake_up_idle_cpu(cpu); -+} -+#endif /* CONFIG_NO_HZ_COMMON */ -+ -+/* -+ * Change a given task's CPU affinity. Migrate the thread to a -+ * proper CPU and schedule it away if the CPU it's executing on -+ * is removed from the allowed bitmask. -+ * -+ * NOTE: the caller must have a valid reference to the task, the -+ * task must not exit() & deallocate itself prematurely. The -+ * call is not atomic; no spinlocks may be held. -+ */ -+static int __set_cpus_allowed_ptr(struct task_struct *p, -+ const struct cpumask *new_mask, bool check) -+{ -+ const struct cpumask *cpu_valid_mask = cpu_active_mask; -+ bool queued = false, running_wrong = false, kthread; -+ struct cpumask old_mask; -+ unsigned int dest_cpu; -+ struct rq_flags rf; -+ struct rq *rq; -+ int ret = 0; -+ -+ rq = task_rq_lock(p, &rf); -+ update_rq_clock(rq); -+ -+ kthread = !!(p->flags & PF_KTHREAD); -+ if (kthread) { -+ /* -+ * Kernel threads are allowed on online && !active CPUs -+ */ -+ cpu_valid_mask = cpu_online_mask; -+ } -+ -+ /* -+ * Must re-check here, to close a race against __kthread_bind(), -+ * sched_setaffinity() is not guaranteed to observe the flag. -+ */ -+ if (check && (p->flags & PF_NO_SETAFFINITY)) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ cpumask_copy(&old_mask, p->cpus_ptr); -+ if (cpumask_equal(&old_mask, new_mask)) -+ goto out; -+ -+ dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); -+ if (dest_cpu >= nr_cpu_ids) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ queued = task_queued(p); -+ __do_set_cpus_allowed(p, new_mask); -+ -+ if (kthread) { -+ /* -+ * For kernel threads that do indeed end up on online && -+ * !active we want to ensure they are strict per-CPU threads. -+ */ -+ WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && -+ !cpumask_intersects(new_mask, cpu_active_mask) && -+ p->nr_cpus_allowed != 1); -+ } -+ -+ /* Can the task run on the task's current CPU? If so, we're done */ -+ if (cpumask_test_cpu(task_cpu(p), new_mask)) -+ goto out; -+ -+ if (task_running(rq, p)) { -+ /* Task is running on the wrong cpu now, reschedule it. */ -+ if (rq == this_rq()) { -+ set_task_cpu(p, dest_cpu); -+ set_tsk_need_resched(p); -+ running_wrong = true; -+ } else -+ resched_task(p); -+ } else { -+ if (queued) { -+ /* -+ * Switch runqueue locks after dequeueing the task -+ * here while still holding the pi_lock to be holding -+ * the correct lock for enqueueing. -+ */ -+ dequeue_task(rq, p, 0); -+ rq_unlock(rq); -+ -+ rq = cpu_rq(dest_cpu); -+ rq_lock(rq); -+ } -+ set_task_cpu(p, dest_cpu); -+ if (queued) -+ enqueue_task(rq, p, 0); -+ } -+ if (queued) -+ try_preempt(p, rq); -+ if (running_wrong) -+ preempt_disable(); -+out: -+ task_rq_unlock(rq, p, &rf); -+ -+ if (running_wrong) { -+ __schedule(true); -+ preempt_enable(); -+ } -+ -+ return ret; -+} -+ -+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) -+{ -+ return __set_cpus_allowed_ptr(p, new_mask, false); -+} -+EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); -+ -+#ifdef CONFIG_HOTPLUG_CPU -+/* -+ * Run through task list and find tasks affined to the dead cpu, then remove -+ * that cpu from the list, enable cpu0 and set the zerobound flag. Must hold -+ * cpu 0 and src_cpu's runqueue locks. We should be holding both rq lock and -+ * pi_lock to change cpus_mask but it's not going to matter here. -+ */ -+static void bind_zero(int src_cpu) -+{ -+ struct task_struct *p, *t; -+ struct rq *rq0; -+ int bound = 0; -+ -+ if (src_cpu == 0) -+ return; -+ -+ rq0 = cpu_rq(0); -+ -+ do_each_thread(t, p) { -+ if (cpumask_test_cpu(src_cpu, p->cpus_ptr)) { -+ bool local = (task_cpu(p) == src_cpu); -+ struct rq *rq = task_rq(p); -+ -+ /* task_running is the cpu stopper thread */ -+ if (local && task_running(rq, p)) -+ continue; -+ atomic_clear_cpu(src_cpu, &p->cpus_mask); -+ atomic_set_cpu(0, &p->cpus_mask); -+ p->zerobound = true; -+ bound++; -+ if (local) { -+ bool queued = task_queued(p); -+ -+ if (queued) -+ dequeue_task(rq, p, 0); -+ set_task_cpu(p, 0); -+ if (queued) -+ enqueue_task(rq0, p, 0); -+ } -+ } -+ } while_each_thread(t, p); -+ -+ if (bound) { -+ printk(KERN_INFO "MuQSS removed affinity for %d processes to cpu %d\n", -+ bound, src_cpu); -+ } -+} -+ -+/* Find processes with the zerobound flag and reenable their affinity for the -+ * CPU coming alive. */ -+static void unbind_zero(int src_cpu) -+{ -+ int unbound = 0, zerobound = 0; -+ struct task_struct *p, *t; -+ -+ if (src_cpu == 0) -+ return; -+ -+ do_each_thread(t, p) { -+ if (!p->mm) -+ p->zerobound = false; -+ if (p->zerobound) { -+ unbound++; -+ cpumask_set_cpu(src_cpu, &p->cpus_mask); -+ /* Once every CPU affinity has been re-enabled, remove -+ * the zerobound flag */ -+ if (cpumask_subset(cpu_possible_mask, p->cpus_ptr)) { -+ p->zerobound = false; -+ zerobound++; -+ } -+ } -+ } while_each_thread(t, p); -+ -+ if (unbound) { -+ printk(KERN_INFO "MuQSS added affinity for %d processes to cpu %d\n", -+ unbound, src_cpu); -+ } -+ if (zerobound) { -+ printk(KERN_INFO "MuQSS released forced binding to cpu0 for %d processes\n", -+ zerobound); -+ } -+} -+ -+/* -+ * Ensure that the idle task is using init_mm right before its cpu goes -+ * offline. -+ */ -+void idle_task_exit(void) -+{ -+ struct mm_struct *mm = current->active_mm; -+ -+ BUG_ON(cpu_online(smp_processor_id())); -+ -+ if (mm != &init_mm) { -+ switch_mm(mm, &init_mm, current); -+ current->active_mm = &init_mm; -+ finish_arch_post_lock_switch(); -+ } -+ mmdrop(mm); -+} -+#else /* CONFIG_HOTPLUG_CPU */ -+static void unbind_zero(int src_cpu) {} -+#endif /* CONFIG_HOTPLUG_CPU */ -+ -+void sched_set_stop_task(int cpu, struct task_struct *stop) -+{ -+ struct sched_param stop_param = { .sched_priority = STOP_PRIO }; -+ struct sched_param start_param = { .sched_priority = 0 }; -+ struct task_struct *old_stop = cpu_rq(cpu)->stop; -+ -+ if (stop) { -+ /* -+ * Make it appear like a SCHED_FIFO task, its something -+ * userspace knows about and won't get confused about. -+ * -+ * Also, it will make PI more or less work without too -+ * much confusion -- but then, stop work should not -+ * rely on PI working anyway. -+ */ -+ sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param); -+ } -+ -+ cpu_rq(cpu)->stop = stop; -+ -+ if (old_stop) { -+ /* -+ * Reset it back to a normal scheduling policy so that -+ * it can die in pieces. -+ */ -+ sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param); -+ } -+} -+ -+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) -+ -+static struct ctl_table sd_ctl_dir[] = { -+ { -+ .procname = "sched_domain", -+ .mode = 0555, -+ }, -+ {} -+}; -+ -+static struct ctl_table sd_ctl_root[] = { -+ { -+ .procname = "kernel", -+ .mode = 0555, -+ .child = sd_ctl_dir, -+ }, -+ {} -+}; -+ -+static struct ctl_table *sd_alloc_ctl_entry(int n) -+{ -+ struct ctl_table *entry = -+ kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL); -+ -+ return entry; -+} -+ -+static void sd_free_ctl_entry(struct ctl_table **tablep) -+{ -+ struct ctl_table *entry; -+ -+ /* -+ * In the intermediate directories, both the child directory and -+ * procname are dynamically allocated and could fail but the mode -+ * will always be set. In the lowest directory the names are -+ * static strings and all have proc handlers. -+ */ -+ for (entry = *tablep; entry->mode; entry++) { -+ if (entry->child) -+ sd_free_ctl_entry(&entry->child); -+ if (entry->proc_handler == NULL) -+ kfree(entry->procname); -+ } -+ -+ kfree(*tablep); -+ *tablep = NULL; -+} -+ -+static void -+set_table_entry(struct ctl_table *entry, -+ const char *procname, void *data, int maxlen, -+ umode_t mode, proc_handler *proc_handler) -+{ -+ entry->procname = procname; -+ entry->data = data; -+ entry->maxlen = maxlen; -+ entry->mode = mode; -+ entry->proc_handler = proc_handler; -+} -+ -+static struct ctl_table * -+sd_alloc_ctl_domain_table(struct sched_domain *sd) -+{ -+ struct ctl_table *table = sd_alloc_ctl_entry(9); -+ -+ if (table == NULL) -+ return NULL; -+ -+ set_table_entry(&table[0], "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax); -+ set_table_entry(&table[1], "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax); -+ set_table_entry(&table[2], "busy_factor", &sd->busy_factor, sizeof(int), 0644, proc_dointvec_minmax); -+ set_table_entry(&table[3], "imbalance_pct", &sd->imbalance_pct, sizeof(int), 0644, proc_dointvec_minmax); -+ set_table_entry(&table[4], "cache_nice_tries", &sd->cache_nice_tries, sizeof(int), 0644, proc_dointvec_minmax); -+ set_table_entry(&table[5], "flags", &sd->flags, sizeof(int), 0644, proc_dointvec_minmax); -+ set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax); -+ set_table_entry(&table[7], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring); -+ /* &table[8] is terminator */ -+ -+ return table; -+} -+ -+static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu) -+{ -+ struct ctl_table *entry, *table; -+ struct sched_domain *sd; -+ int domain_num = 0, i; -+ char buf[32]; -+ -+ for_each_domain(cpu, sd) -+ domain_num++; -+ entry = table = sd_alloc_ctl_entry(domain_num + 1); -+ if (table == NULL) -+ return NULL; -+ -+ i = 0; -+ for_each_domain(cpu, sd) { -+ snprintf(buf, 32, "domain%d", i); -+ entry->procname = kstrdup(buf, GFP_KERNEL); -+ entry->mode = 0555; -+ entry->child = sd_alloc_ctl_domain_table(sd); -+ entry++; -+ i++; -+ } -+ return table; -+} -+ -+static cpumask_var_t sd_sysctl_cpus; -+static struct ctl_table_header *sd_sysctl_header; -+ -+void register_sched_domain_sysctl(void) -+{ -+ static struct ctl_table *cpu_entries; -+ static struct ctl_table **cpu_idx; -+ char buf[32]; -+ int i; -+ -+ if (!cpu_entries) { -+ cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1); -+ if (!cpu_entries) -+ return; -+ -+ WARN_ON(sd_ctl_dir[0].child); -+ sd_ctl_dir[0].child = cpu_entries; -+ } -+ -+ if (!cpu_idx) { -+ struct ctl_table *e = cpu_entries; -+ -+ cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL); -+ if (!cpu_idx) -+ return; -+ -+ /* deal with sparse possible map */ -+ for_each_possible_cpu(i) { -+ cpu_idx[i] = e; -+ e++; -+ } -+ } -+ -+ if (!cpumask_available(sd_sysctl_cpus)) { -+ if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL)) -+ return; -+ -+ /* init to possible to not have holes in @cpu_entries */ -+ cpumask_copy(sd_sysctl_cpus, cpu_possible_mask); -+ } -+ -+ for_each_cpu(i, sd_sysctl_cpus) { -+ struct ctl_table *e = cpu_idx[i]; -+ -+ if (e->child) -+ sd_free_ctl_entry(&e->child); -+ -+ if (!e->procname) { -+ snprintf(buf, 32, "cpu%d", i); -+ e->procname = kstrdup(buf, GFP_KERNEL); -+ } -+ e->mode = 0555; -+ e->child = sd_alloc_ctl_cpu_table(i); -+ -+ __cpumask_clear_cpu(i, sd_sysctl_cpus); -+ } -+ -+ WARN_ON(sd_sysctl_header); -+ sd_sysctl_header = register_sysctl_table(sd_ctl_root); -+} -+ -+void dirty_sched_domain_sysctl(int cpu) -+{ -+ if (cpumask_available(sd_sysctl_cpus)) -+ __cpumask_set_cpu(cpu, sd_sysctl_cpus); -+} -+ -+/* may be called multiple times per register */ -+void unregister_sched_domain_sysctl(void) -+{ -+ unregister_sysctl_table(sd_sysctl_header); -+ sd_sysctl_header = NULL; -+} -+#endif /* CONFIG_SYSCTL */ -+ -+void set_rq_online(struct rq *rq) -+{ -+ if (!rq->online) { -+ cpumask_set_cpu(cpu_of(rq), rq->rd->online); -+ rq->online = true; -+ } -+} -+ -+void set_rq_offline(struct rq *rq) -+{ -+ if (rq->online) { -+ int cpu = cpu_of(rq); -+ -+ cpumask_clear_cpu(cpu, rq->rd->online); -+ rq->online = false; -+ clear_cpuidle_map(cpu); -+ } -+} -+ -+/* -+ * used to mark begin/end of suspend/resume: -+ */ -+static int num_cpus_frozen; -+ -+/* -+ * Update cpusets according to cpu_active mask. If cpusets are -+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper -+ * around partition_sched_domains(). -+ * -+ * If we come here as part of a suspend/resume, don't touch cpusets because we -+ * want to restore it back to its original state upon resume anyway. -+ */ -+static void cpuset_cpu_active(void) -+{ -+ if (cpuhp_tasks_frozen) { -+ /* -+ * num_cpus_frozen tracks how many CPUs are involved in suspend -+ * resume sequence. As long as this is not the last online -+ * operation in the resume sequence, just build a single sched -+ * domain, ignoring cpusets. -+ */ -+ partition_sched_domains(1, NULL, NULL); -+ if (--num_cpus_frozen) -+ return; -+ /* -+ * This is the last CPU online operation. So fall through and -+ * restore the original sched domains by considering the -+ * cpuset configurations. -+ */ -+ cpuset_force_rebuild(); -+ } -+ -+ cpuset_update_active_cpus(); -+} -+ -+static int cpuset_cpu_inactive(unsigned int cpu) -+{ -+ if (!cpuhp_tasks_frozen) { -+ cpuset_update_active_cpus(); -+ } else { -+ num_cpus_frozen++; -+ partition_sched_domains(1, NULL, NULL); -+ } -+ return 0; -+} -+ -+int sched_cpu_activate(unsigned int cpu) -+{ -+ struct rq *rq = cpu_rq(cpu); -+ struct rq_flags rf; -+ -+#ifdef CONFIG_SCHED_SMT -+ /* -+ * When going up, increment the number of cores with SMT present. -+ */ -+ if (cpumask_weight(cpu_smt_mask(cpu)) == 2) -+ static_branch_inc_cpuslocked(&sched_smt_present); -+#endif -+ set_cpu_active(cpu, true); -+ -+ if (sched_smp_initialized) { -+ sched_domains_numa_masks_set(cpu); -+ cpuset_cpu_active(); -+ } -+ -+ /* -+ * Put the rq online, if not already. This happens: -+ * -+ * 1) In the early boot process, because we build the real domains -+ * after all CPUs have been brought up. -+ * -+ * 2) At runtime, if cpuset_cpu_active() fails to rebuild the -+ * domains. -+ */ -+ rq_lock_irqsave(rq, &rf); -+ if (rq->rd) { -+ BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); -+ set_rq_online(rq); -+ } -+ unbind_zero(cpu); -+ rq_unlock_irqrestore(rq, &rf); -+ -+ return 0; -+} -+ -+int sched_cpu_deactivate(unsigned int cpu) -+{ -+ int ret; -+ -+ set_cpu_active(cpu, false); -+ /* -+ * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU -+ * users of this state to go away such that all new such users will -+ * observe it. -+ * -+ * Do sync before park smpboot threads to take care the rcu boost case. -+ */ -+ synchronize_rcu(); -+ -+#ifdef CONFIG_SCHED_SMT -+ /* -+ * When going down, decrement the number of cores with SMT present. -+ */ -+ if (cpumask_weight(cpu_smt_mask(cpu)) == 2) -+ static_branch_dec_cpuslocked(&sched_smt_present); -+#endif -+ -+ if (!sched_smp_initialized) -+ return 0; -+ -+ ret = cpuset_cpu_inactive(cpu); -+ if (ret) { -+ set_cpu_active(cpu, true); -+ return ret; -+ } -+ sched_domains_numa_masks_clear(cpu); -+ return 0; -+} -+ -+int sched_cpu_starting(unsigned int cpu) -+{ -+ sched_tick_start(cpu); -+ return 0; -+} -+ -+#ifdef CONFIG_HOTPLUG_CPU -+int sched_cpu_dying(unsigned int cpu) -+{ -+ struct rq *rq = cpu_rq(cpu); -+ unsigned long flags; -+ -+ /* Handle pending wakeups and then migrate everything off */ -+ sched_ttwu_pending(); -+ sched_tick_stop(cpu); -+ -+ local_irq_save(flags); -+ double_rq_lock(rq, cpu_rq(0)); -+ if (rq->rd) { -+ BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); -+ set_rq_offline(rq); -+ } -+ bind_zero(cpu); -+ double_rq_unlock(rq, cpu_rq(0)); -+ sched_start_tick(rq, cpu); -+ hrexpiry_clear(rq); -+ local_irq_restore(flags); -+ -+ return 0; -+} -+#endif -+ -+#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC) -+/* -+ * Cheaper version of the below functions in case support for SMT and MC is -+ * compiled in but CPUs have no siblings. -+ */ -+static bool sole_cpu_idle(struct rq *rq) -+{ -+ return rq_idle(rq); -+} -+#endif -+#ifdef CONFIG_SCHED_SMT -+static const cpumask_t *thread_cpumask(int cpu) -+{ -+ return topology_sibling_cpumask(cpu); -+} -+/* All this CPU's SMT siblings are idle */ -+static bool siblings_cpu_idle(struct rq *rq) -+{ -+ return cpumask_subset(&rq->thread_mask, &cpu_idle_map); -+} -+#endif -+#ifdef CONFIG_SCHED_MC -+static const cpumask_t *core_cpumask(int cpu) -+{ -+ return topology_core_cpumask(cpu); -+} -+/* All this CPU's shared cache siblings are idle */ -+static bool cache_cpu_idle(struct rq *rq) -+{ -+ return cpumask_subset(&rq->core_mask, &cpu_idle_map); -+} -+/* MC siblings CPU mask which share the same LLC */ -+static const cpumask_t *llc_core_cpumask(int cpu) -+{ -+ return per_cpu(cpu_llc_shared_map, cpu); -+} -+#endif -+ -+enum sched_domain_level { -+ SD_LV_NONE = 0, -+ SD_LV_SIBLING, -+ SD_LV_MC, -+ SD_LV_BOOK, -+ SD_LV_CPU, -+ SD_LV_NODE, -+ SD_LV_ALLNODES, -+ SD_LV_MAX -+}; -+ -+void __init sched_init_smp(void) -+{ -+ struct rq *rq, *other_rq, *leader = cpu_rq(0); -+ struct sched_domain *sd; -+ int cpu, other_cpu, i; -+#ifdef CONFIG_SCHED_SMT -+ bool smt_threads = false; -+#endif -+ sched_init_numa(); -+ -+ /* -+ * There's no userspace yet to cause hotplug operations; hence all the -+ * cpu masks are stable and all blatant races in the below code cannot -+ * happen. -+ */ -+ mutex_lock(&sched_domains_mutex); -+ sched_init_domains(cpu_active_mask); -+ mutex_unlock(&sched_domains_mutex); -+ -+ /* Move init over to a non-isolated CPU */ -+ if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) -+ BUG(); -+ -+ local_irq_disable(); -+ mutex_lock(&sched_domains_mutex); -+ lock_all_rqs(); -+ -+ printk(KERN_INFO "MuQSS possible/present/online CPUs: %d/%d/%d\n", -+ num_possible_cpus(), num_present_cpus(), num_online_cpus()); -+ -+ /* -+ * Set up the relative cache distance of each online cpu from each -+ * other in a simple array for quick lookup. Locality is determined -+ * by the closest sched_domain that CPUs are separated by. CPUs with -+ * shared cache in SMT and MC are treated as local. Separate CPUs -+ * (within the same package or physically) within the same node are -+ * treated as not local. CPUs not even in the same domain (different -+ * nodes) are treated as very distant. -+ */ -+ for (cpu = num_online_cpus() - 1; cpu >= 0; cpu--) { -+ rq = cpu_rq(cpu); -+ leader = NULL; -+ /* First check if this cpu is in the same node */ -+ for_each_domain(cpu, sd) { -+ if (sd->level > SD_LV_MC) -+ continue; -+ if (rqshare != RQSHARE_ALL) -+ leader = NULL; -+ /* Set locality to local node if not already found lower */ -+ for_each_cpu(other_cpu, sched_domain_span(sd)) { -+ if (rqshare >= RQSHARE_SMP) { -+ other_rq = cpu_rq(other_cpu); -+ -+ /* Set the smp_leader to the first CPU */ -+ if (!leader) -+ leader = rq; -+ other_rq->smp_leader = leader; -+ } -+ if (rq->cpu_locality[other_cpu] > LOCALITY_SMP) -+ rq->cpu_locality[other_cpu] = LOCALITY_SMP; -+ } -+ } -+ -+ /* -+ * Each runqueue has its own function in case it doesn't have -+ * siblings of its own allowing mixed topologies. -+ */ -+#ifdef CONFIG_SCHED_MC -+ leader = NULL; -+ if (cpumask_weight(core_cpumask(cpu)) > 1) { -+ cpumask_copy(&rq->core_mask, llc_core_cpumask(cpu)); -+ cpumask_clear_cpu(cpu, &rq->core_mask); -+ for_each_cpu(other_cpu, core_cpumask(cpu)) { -+ if (rqshare == RQSHARE_MC || -+ (rqshare == RQSHARE_MC_LLC && cpumask_test_cpu(other_cpu, llc_core_cpumask(cpu)))) { -+ other_rq = cpu_rq(other_cpu); -+ -+ /* Set the mc_leader to the first CPU */ -+ if (!leader) -+ leader = rq; -+ other_rq->mc_leader = leader; -+ } -+ if (rq->cpu_locality[other_cpu] > LOCALITY_MC) { -+ /* this is to get LLC into play even in case LLC sharing is not used */ -+ if (cpumask_test_cpu(other_cpu, llc_core_cpumask(cpu))) -+ rq->cpu_locality[other_cpu] = LOCALITY_MC_LLC; -+ else -+ rq->cpu_locality[other_cpu] = LOCALITY_MC; -+ } -+ } -+ rq->cache_idle = cache_cpu_idle; -+ } -+#endif -+#ifdef CONFIG_SCHED_SMT -+ leader = NULL; -+ if (cpumask_weight(thread_cpumask(cpu)) > 1) { -+ cpumask_copy(&rq->thread_mask, thread_cpumask(cpu)); -+ cpumask_clear_cpu(cpu, &rq->thread_mask); -+ for_each_cpu(other_cpu, thread_cpumask(cpu)) { -+ if (rqshare == RQSHARE_SMT) { -+ other_rq = cpu_rq(other_cpu); -+ -+ /* Set the smt_leader to the first CPU */ -+ if (!leader) -+ leader = rq; -+ other_rq->smt_leader = leader; -+ } -+ if (rq->cpu_locality[other_cpu] > LOCALITY_SMT) -+ rq->cpu_locality[other_cpu] = LOCALITY_SMT; -+ } -+ rq->siblings_idle = siblings_cpu_idle; -+ smt_threads = true; -+ } -+#endif -+ } -+ -+#ifdef CONFIG_SMT_NICE -+ if (smt_threads) { -+ check_siblings = &check_smt_siblings; -+ wake_siblings = &wake_smt_siblings; -+ smt_schedule = &smt_should_schedule; -+ } -+#endif -+ unlock_all_rqs(); -+ mutex_unlock(&sched_domains_mutex); -+ -+ for_each_online_cpu(cpu) { -+ rq = cpu_rq(cpu); -+ for_each_online_cpu(other_cpu) { -+ printk(KERN_DEBUG "MuQSS locality CPU %d to %d: %d\n", cpu, other_cpu, rq->cpu_locality[other_cpu]); -+ } -+ } -+ -+ for_each_online_cpu(cpu) { -+ rq = cpu_rq(cpu); -+ leader = rq->smp_leader; -+ -+ rq_lock(rq); -+ if (leader && rq != leader) { -+ printk(KERN_INFO "MuQSS sharing SMP runqueue from CPU %d to CPU %d\n", -+ leader->cpu, rq->cpu); -+ kfree(rq->node); -+ kfree(rq->sl); -+ kfree(rq->lock); -+ rq->node = leader->node; -+ rq->sl = leader->sl; -+ rq->lock = leader->lock; -+ barrier(); -+ /* To make up for not unlocking the freed runlock */ -+ preempt_enable(); -+ } else -+ rq_unlock(rq); -+ } -+ -+#ifdef CONFIG_SCHED_MC -+ for_each_online_cpu(cpu) { -+ rq = cpu_rq(cpu); -+ leader = rq->mc_leader; -+ -+ rq_lock(rq); -+ if (leader && rq != leader) { -+ printk(KERN_INFO "MuQSS sharing MC runqueue from CPU %d to CPU %d\n", -+ leader->cpu, rq->cpu); -+ kfree(rq->node); -+ kfree(rq->sl); -+ kfree(rq->lock); -+ rq->node = leader->node; -+ rq->sl = leader->sl; -+ rq->lock = leader->lock; -+ barrier(); -+ /* To make up for not unlocking the freed runlock */ -+ preempt_enable(); -+ } else -+ rq_unlock(rq); -+ } -+#endif /* CONFIG_SCHED_MC */ -+ -+#ifdef CONFIG_SCHED_SMT -+ for_each_online_cpu(cpu) { -+ rq = cpu_rq(cpu); -+ -+ leader = rq->smt_leader; -+ -+ rq_lock(rq); -+ if (leader && rq != leader) { -+ printk(KERN_INFO "MuQSS sharing SMT runqueue from CPU %d to CPU %d\n", -+ leader->cpu, rq->cpu); -+ kfree(rq->node); -+ kfree(rq->sl); -+ kfree(rq->lock); -+ rq->node = leader->node; -+ rq->sl = leader->sl; -+ rq->lock = leader->lock; -+ barrier(); -+ /* To make up for not unlocking the freed runlock */ -+ preempt_enable(); -+ } else -+ rq_unlock(rq); -+ } -+#endif /* CONFIG_SCHED_SMT */ -+ -+ local_irq_enable(); -+ -+ total_runqueues = 0; -+ for_each_online_cpu(cpu) { -+ int locality, total_rqs = 0, total_cpus = 0; -+ -+ rq = cpu_rq(cpu); -+ if ( -+#ifdef CONFIG_SCHED_MC -+ (rq->mc_leader == rq) && -+#endif -+#ifdef CONFIG_SCHED_SMT -+ (rq->smt_leader == rq) && -+#endif -+ (rq->smp_leader == rq)) { -+ total_runqueues++; -+ } -+ -+ for (locality = LOCALITY_SAME; locality <= LOCALITY_DISTANT; locality++) { -+ int selected_cpus[NR_CPUS], selected_cpu_cnt, selected_cpu_idx, test_cpu_idx, cpu_idx, best_locality, test_cpu; -+ int ordered_cpus[NR_CPUS], ordered_cpus_idx; -+ -+ ordered_cpus_idx = -1; -+ selected_cpu_cnt = 0; -+ -+ for_each_online_cpu(test_cpu) { -+ if (cpu < num_online_cpus() / 2) -+ other_cpu = cpu + test_cpu; -+ else -+ other_cpu = cpu - test_cpu; -+ if (other_cpu < 0) -+ other_cpu += num_online_cpus(); -+ else -+ other_cpu %= num_online_cpus(); -+ /* gather CPUs of the same locality */ -+ if (rq->cpu_locality[other_cpu] == locality) { -+ selected_cpus[selected_cpu_cnt] = other_cpu; -+ selected_cpu_cnt++; -+ } -+ } -+ -+ /* reserve first CPU as starting point */ -+ if (selected_cpu_cnt > 0) { -+ ordered_cpus_idx++; -+ ordered_cpus[ordered_cpus_idx] = selected_cpus[ordered_cpus_idx]; -+ selected_cpus[ordered_cpus_idx] = -1; -+ } -+ -+ /* take each CPU and sort it within the same locality based on each inter-CPU localities */ -+ for(test_cpu_idx = 1; test_cpu_idx < selected_cpu_cnt; test_cpu_idx++) { -+ /* starting point with worst locality and current CPU */ -+ best_locality = LOCALITY_DISTANT; -+ selected_cpu_idx = test_cpu_idx; -+ -+ /* try to find the best locality within group */ -+ for(cpu_idx = 1; cpu_idx < selected_cpu_cnt; cpu_idx++) { -+ /* if CPU has not been used and locality is better */ -+ if (selected_cpus[cpu_idx] > -1) { -+ other_rq = cpu_rq(ordered_cpus[ordered_cpus_idx]); -+ if (best_locality > other_rq->cpu_locality[selected_cpus[cpu_idx]]) { -+ /* assign best locality and best CPU idx in array */ -+ best_locality = other_rq->cpu_locality[selected_cpus[cpu_idx]]; -+ selected_cpu_idx = cpu_idx; -+ } -+ } -+ } -+ -+ /* add our next best CPU to ordered list */ -+ ordered_cpus_idx++; -+ ordered_cpus[ordered_cpus_idx] = selected_cpus[selected_cpu_idx]; -+ /* mark this CPU as used */ -+ selected_cpus[selected_cpu_idx] = -1; -+ } -+ -+ /* set up RQ and CPU orders */ -+ for (test_cpu = 0; test_cpu <= ordered_cpus_idx; test_cpu++) { -+ other_rq = cpu_rq(ordered_cpus[test_cpu]); -+ /* set up cpu orders */ -+ rq->cpu_order[total_cpus++] = other_rq; -+ if ( -+#ifdef CONFIG_SCHED_MC -+ (other_rq->mc_leader == other_rq) && -+#endif -+#ifdef CONFIG_SCHED_SMT -+ (other_rq->smt_leader == other_rq) && -+#endif -+ (other_rq->smp_leader == other_rq)) { -+ /* set up RQ orders */ -+ rq->rq_order[total_rqs++] = other_rq; -+ } -+ } -+ } -+ } -+ -+ for_each_online_cpu(cpu) { -+ rq = cpu_rq(cpu); -+ for (i = 0; i < total_runqueues; i++) { -+ printk(KERN_DEBUG "MuQSS CPU %d llc %d RQ order %d RQ %d llc %d\n", cpu, per_cpu(cpu_llc_id, cpu), i, -+ rq->rq_order[i]->cpu, per_cpu(cpu_llc_id, rq->rq_order[i]->cpu)); -+ } -+ } -+ -+ for_each_online_cpu(cpu) { -+ rq = cpu_rq(cpu); -+ for (i = 0; i < num_online_cpus(); i++) { -+ printk(KERN_DEBUG "MuQSS CPU %d llc %d CPU order %d RQ %d llc %d\n", cpu, per_cpu(cpu_llc_id, cpu), i, -+ rq->cpu_order[i]->cpu, per_cpu(cpu_llc_id, rq->cpu_order[i]->cpu)); -+ } -+ } -+ -+ switch (rqshare) { -+ case RQSHARE_ALL: -+ /* This should only ever read 1 */ -+ printk(KERN_INFO "MuQSS runqueue share type ALL total runqueues: %d\n", -+ total_runqueues); -+ break; -+ case RQSHARE_SMP: -+ printk(KERN_INFO "MuQSS runqueue share type SMP total runqueues: %d\n", -+ total_runqueues); -+ break; -+ case RQSHARE_MC: -+ printk(KERN_INFO "MuQSS runqueue share type MC total runqueues: %d\n", -+ total_runqueues); -+ break; -+ case RQSHARE_MC_LLC: -+ printk(KERN_INFO "MuQSS runqueue share type LLC total runqueues: %d\n", -+ total_runqueues); -+ break; -+ case RQSHARE_SMT: -+ printk(KERN_INFO "MuQSS runqueue share type SMT total runqueues: %d\n", -+ total_runqueues); -+ break; -+ case RQSHARE_NONE: -+ printk(KERN_INFO "MuQSS runqueue share type NONE total runqueues: %d\n", -+ total_runqueues); -+ break; -+ } -+ -+ sched_smp_initialized = true; -+} -+#else -+void __init sched_init_smp(void) -+{ -+ sched_smp_initialized = true; -+} -+#endif /* CONFIG_SMP */ -+ -+int in_sched_functions(unsigned long addr) -+{ -+ return in_lock_functions(addr) || -+ (addr >= (unsigned long)__sched_text_start -+ && addr < (unsigned long)__sched_text_end); -+} -+ -+#ifdef CONFIG_CGROUP_SCHED -+/* task group related information */ -+struct task_group { -+ struct cgroup_subsys_state css; -+ -+ struct rcu_head rcu; -+ struct list_head list; -+ -+ struct task_group *parent; -+ struct list_head siblings; -+ struct list_head children; -+}; -+ -+/* -+ * Default task group. -+ * Every task in system belongs to this group at bootup. -+ */ -+struct task_group root_task_group; -+LIST_HEAD(task_groups); -+ -+/* Cacheline aligned slab cache for task_group */ -+static struct kmem_cache *task_group_cache __read_mostly; -+#endif /* CONFIG_CGROUP_SCHED */ -+ -+void __init sched_init(void) -+{ -+#ifdef CONFIG_SMP -+ int cpu_ids; -+#endif -+ int i; -+ struct rq *rq; -+ -+ wait_bit_init(); -+ -+ prio_ratios[0] = 128; -+ for (i = 1 ; i < NICE_WIDTH ; i++) -+ prio_ratios[i] = prio_ratios[i - 1] * 11 / 10; -+ -+ skiplist_node_init(&init_task.node); -+ -+#ifdef CONFIG_SMP -+ init_defrootdomain(); -+ cpumask_clear(&cpu_idle_map); -+#else -+ uprq = &per_cpu(runqueues, 0); -+#endif -+ -+#ifdef CONFIG_CGROUP_SCHED -+ task_group_cache = KMEM_CACHE(task_group, 0); -+ -+ list_add(&root_task_group.list, &task_groups); -+ INIT_LIST_HEAD(&root_task_group.children); -+ INIT_LIST_HEAD(&root_task_group.siblings); -+#endif /* CONFIG_CGROUP_SCHED */ -+ for_each_possible_cpu(i) { -+ rq = cpu_rq(i); -+ rq->node = kmalloc(sizeof(skiplist_node), GFP_ATOMIC); -+ skiplist_init(rq->node); -+ rq->sl = new_skiplist(rq->node); -+ rq->lock = kmalloc(sizeof(raw_spinlock_t), GFP_ATOMIC); -+ raw_spin_lock_init(rq->lock); -+ rq->nr_running = 0; -+ rq->nr_uninterruptible = 0; -+ rq->nr_switches = 0; -+ rq->clock = rq->old_clock = rq->last_niffy = rq->niffies = 0; -+ rq->last_jiffy = jiffies; -+ rq->user_ns = rq->nice_ns = rq->softirq_ns = rq->system_ns = -+ rq->iowait_ns = rq->idle_ns = 0; -+ rq->dither = 0; -+ set_rq_task(rq, &init_task); -+ rq->iso_ticks = 0; -+ rq->iso_refractory = false; -+#ifdef CONFIG_SMP -+ rq->smp_leader = rq; -+#ifdef CONFIG_SCHED_MC -+ rq->mc_leader = rq; -+#endif -+#ifdef CONFIG_SCHED_SMT -+ rq->smt_leader = rq; -+#endif -+ rq->sd = NULL; -+ rq->rd = NULL; -+ rq->online = false; -+ rq->cpu = i; -+ rq_attach_root(rq, &def_root_domain); -+#endif -+ init_rq_hrexpiry(rq); -+ atomic_set(&rq->nr_iowait, 0); -+ } -+ -+#ifdef CONFIG_SMP -+ cpu_ids = i; -+ /* -+ * Set the base locality for cpu cache distance calculation to -+ * "distant" (3). Make sure the distance from a CPU to itself is 0. -+ */ -+ for_each_possible_cpu(i) { -+ int j; -+ -+ rq = cpu_rq(i); -+#ifdef CONFIG_SCHED_SMT -+ rq->siblings_idle = sole_cpu_idle; -+#endif -+#ifdef CONFIG_SCHED_MC -+ rq->cache_idle = sole_cpu_idle; -+#endif -+ rq->cpu_locality = kmalloc(cpu_ids * sizeof(int *), GFP_ATOMIC); -+ for_each_possible_cpu(j) { -+ if (i == j) -+ rq->cpu_locality[j] = LOCALITY_SAME; -+ else -+ rq->cpu_locality[j] = LOCALITY_DISTANT; -+ } -+ rq->rq_order = kmalloc(cpu_ids * sizeof(struct rq *), GFP_ATOMIC); -+ rq->cpu_order = kmalloc(cpu_ids * sizeof(struct rq *), GFP_ATOMIC); -+ rq->rq_order[0] = rq->cpu_order[0] = rq; -+ for (j = 1; j < cpu_ids; j++) -+ rq->rq_order[j] = rq->cpu_order[j] = cpu_rq(j); -+ } -+#endif -+ -+ /* -+ * The boot idle thread does lazy MMU switching as well: -+ */ -+ mmgrab(&init_mm); -+ enter_lazy_tlb(&init_mm, current); -+ -+ /* -+ * Make us the idle thread. Technically, schedule() should not be -+ * called from this thread, however somewhere below it might be, -+ * but because we are the idle thread, we just pick up running again -+ * when this runqueue becomes "idle". -+ */ -+ init_idle(current, smp_processor_id()); -+ -+#ifdef CONFIG_SMP -+ idle_thread_set_boot_cpu(); -+#endif /* SMP */ -+ -+ init_schedstats(); -+ -+ psi_init(); -+} -+ -+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP -+static inline int preempt_count_equals(int preempt_offset) -+{ -+ int nested = preempt_count() + rcu_preempt_depth(); -+ -+ return (nested == preempt_offset); -+} -+ -+void __might_sleep(const char *file, int line, int preempt_offset) -+{ -+ /* -+ * Blocking primitives will set (and therefore destroy) current->state, -+ * since we will exit with TASK_RUNNING make sure we enter with it, -+ * otherwise we will destroy state. -+ */ -+ WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change, -+ "do not call blocking ops when !TASK_RUNNING; " -+ "state=%lx set at [<%p>] %pS\n", -+ current->state, -+ (void *)current->task_state_change, -+ (void *)current->task_state_change); -+ -+ ___might_sleep(file, line, preempt_offset); -+} -+EXPORT_SYMBOL(__might_sleep); -+ -+void __cant_sleep(const char *file, int line, int preempt_offset) -+{ -+ static unsigned long prev_jiffy; -+ -+ if (irqs_disabled()) -+ return; -+ -+ if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) -+ return; -+ -+ if (preempt_count() > preempt_offset) -+ return; -+ -+ if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) -+ return; -+ prev_jiffy = jiffies; -+ -+ printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line); -+ printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", -+ in_atomic(), irqs_disabled(), -+ current->pid, current->comm); -+ -+ debug_show_held_locks(current); -+ dump_stack(); -+ add_taint(TAINT_WARN, LOCKDEP_STILL_OK); -+} -+EXPORT_SYMBOL_GPL(__cant_sleep); -+ -+void ___might_sleep(const char *file, int line, int preempt_offset) -+{ -+ /* Ratelimiting timestamp: */ -+ static unsigned long prev_jiffy; -+ -+ unsigned long preempt_disable_ip; -+ -+ /* WARN_ON_ONCE() by default, no rate limit required: */ -+ rcu_sleep_check(); -+ -+ if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && -+ !is_idle_task(current) && !current->non_block_count) || -+ system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING || -+ oops_in_progress) -+ return; -+ -+ if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) -+ return; -+ prev_jiffy = jiffies; -+ -+ /* Save this before calling printk(), since that will clobber it: */ -+ preempt_disable_ip = get_preempt_disable_ip(current); -+ -+ printk(KERN_ERR -+ "BUG: sleeping function called from invalid context at %s:%d\n", -+ file, line); -+ printk(KERN_ERR -+ "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", -+ in_atomic(), irqs_disabled(), current->non_block_count, -+ current->pid, current->comm); -+ -+ if (task_stack_end_corrupted(current)) -+ printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); -+ -+ debug_show_held_locks(current); -+ if (irqs_disabled()) -+ print_irqtrace_events(current); -+ if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) -+ && !preempt_count_equals(preempt_offset)) { -+ pr_err("Preemption disabled at:"); -+ print_ip_sym(preempt_disable_ip); -+ pr_cont("\n"); -+ } -+ dump_stack(); -+ add_taint(TAINT_WARN, LOCKDEP_STILL_OK); -+} -+EXPORT_SYMBOL(___might_sleep); -+#endif -+ -+#ifdef CONFIG_MAGIC_SYSRQ -+static inline void normalise_rt_tasks(void) -+{ -+ struct sched_attr attr = {}; -+ struct task_struct *g, *p; -+ struct rq_flags rf; -+ struct rq *rq; -+ -+ read_lock(&tasklist_lock); -+ for_each_process_thread(g, p) { -+ /* -+ * Only normalize user tasks: -+ */ -+ if (p->flags & PF_KTHREAD) -+ continue; -+ -+ if (!rt_task(p) && !iso_task(p)) -+ continue; -+ -+ rq = task_rq_lock(p, &rf); -+ __setscheduler(p, rq, SCHED_NORMAL, 0, &attr, false); -+ task_rq_unlock(rq, p, &rf); -+ } -+ read_unlock(&tasklist_lock); -+} -+ -+void normalize_rt_tasks(void) -+{ -+ normalise_rt_tasks(); -+} -+#endif /* CONFIG_MAGIC_SYSRQ */ -+ -+#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) -+/* -+ * These functions are only useful for the IA64 MCA handling, or kdb. -+ * -+ * They can only be called when the whole system has been -+ * stopped - every CPU needs to be quiescent, and no scheduling -+ * activity can take place. Using them for anything else would -+ * be a serious bug, and as a result, they aren't even visible -+ * under any other configuration. -+ */ -+ -+/** -+ * curr_task - return the current task for a given CPU. -+ * @cpu: the processor in question. -+ * -+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! -+ * -+ * Return: The current task for @cpu. -+ */ -+struct task_struct *curr_task(int cpu) -+{ -+ return cpu_curr(cpu); -+} -+ -+#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */ -+ -+#ifdef CONFIG_IA64 -+/** -+ * ia64_set_curr_task - set the current task for a given CPU. -+ * @cpu: the processor in question. -+ * @p: the task pointer to set. -+ * -+ * Description: This function must only be used when non-maskable interrupts -+ * are serviced on a separate stack. It allows the architecture to switch the -+ * notion of the current task on a CPU in a non-blocking manner. This function -+ * must be called with all CPU's synchronised, and interrupts disabled, the -+ * and caller must save the original value of the current task (see -+ * curr_task() above) and restore that value before reenabling interrupts and -+ * re-starting the system. -+ * -+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! -+ */ -+void ia64_set_curr_task(int cpu, struct task_struct *p) -+{ -+ cpu_curr(cpu) = p; -+} -+ -+#endif -+ -+void init_idle_bootup_task(struct task_struct *idle) -+{} -+ -+#ifdef CONFIG_SCHED_DEBUG -+__read_mostly bool sched_debug_enabled; -+ -+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, -+ struct seq_file *m) -+{ -+ seq_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns), -+ get_nr_threads(p)); -+} -+ -+void proc_sched_set_task(struct task_struct *p) -+{} -+#endif -+ -+#ifdef CONFIG_CGROUP_SCHED -+static void sched_free_group(struct task_group *tg) -+{ -+ kmem_cache_free(task_group_cache, tg); -+} -+ -+/* allocate runqueue etc for a new task group */ -+struct task_group *sched_create_group(struct task_group *parent) -+{ -+ struct task_group *tg; -+ -+ tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO); -+ if (!tg) -+ return ERR_PTR(-ENOMEM); -+ -+ return tg; -+} -+ -+void sched_online_group(struct task_group *tg, struct task_group *parent) -+{ -+} -+ -+/* rcu callback to free various structures associated with a task group */ -+static void sched_free_group_rcu(struct rcu_head *rhp) -+{ -+ /* Now it should be safe to free those cfs_rqs */ -+ sched_free_group(container_of(rhp, struct task_group, rcu)); -+} -+ -+void sched_destroy_group(struct task_group *tg) -+{ -+ /* Wait for possible concurrent references to cfs_rqs complete */ -+ call_rcu(&tg->rcu, sched_free_group_rcu); -+} -+ -+void sched_offline_group(struct task_group *tg) -+{ -+} -+ -+static inline struct task_group *css_tg(struct cgroup_subsys_state *css) -+{ -+ return css ? container_of(css, struct task_group, css) : NULL; -+} -+ -+static struct cgroup_subsys_state * -+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) -+{ -+ struct task_group *parent = css_tg(parent_css); -+ struct task_group *tg; -+ -+ if (!parent) { -+ /* This is early initialization for the top cgroup */ -+ return &root_task_group.css; -+ } -+ -+ tg = sched_create_group(parent); -+ if (IS_ERR(tg)) -+ return ERR_PTR(-ENOMEM); -+ return &tg->css; -+} -+ -+/* Expose task group only after completing cgroup initialization */ -+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css) -+{ -+ struct task_group *tg = css_tg(css); -+ struct task_group *parent = css_tg(css->parent); -+ -+ if (parent) -+ sched_online_group(tg, parent); -+ return 0; -+} -+ -+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css) -+{ -+ struct task_group *tg = css_tg(css); -+ -+ sched_offline_group(tg); -+} -+ -+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) -+{ -+ struct task_group *tg = css_tg(css); -+ -+ /* -+ * Relies on the RCU grace period between css_released() and this. -+ */ -+ sched_free_group(tg); -+} -+ -+static void cpu_cgroup_fork(struct task_struct *task) -+{ -+} -+ -+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) -+{ -+ return 0; -+} -+ -+static void cpu_cgroup_attach(struct cgroup_taskset *tset) -+{ -+} -+ -+static struct cftype cpu_legacy_files[] = { -+ { } /* Terminate */ -+}; -+ -+static struct cftype cpu_files[] = { -+ { } /* terminate */ -+}; -+ -+static int cpu_extra_stat_show(struct seq_file *sf, -+ struct cgroup_subsys_state *css) -+{ -+ return 0; -+} -+ -+struct cgroup_subsys cpu_cgrp_subsys = { -+ .css_alloc = cpu_cgroup_css_alloc, -+ .css_online = cpu_cgroup_css_online, -+ .css_released = cpu_cgroup_css_released, -+ .css_free = cpu_cgroup_css_free, -+ .css_extra_stat_show = cpu_extra_stat_show, -+ .fork = cpu_cgroup_fork, -+ .can_attach = cpu_cgroup_can_attach, -+ .attach = cpu_cgroup_attach, -+ .legacy_cftypes = cpu_files, -+ .legacy_cftypes = cpu_legacy_files, -+ .dfl_cftypes = cpu_files, -+ .early_init = true, -+ .threaded = true, -+}; -+#endif /* CONFIG_CGROUP_SCHED */ -+ -+#undef CREATE_TRACE_POINTS -diff --git a/kernel/sched/MuQSS.h b/kernel/sched/MuQSS.h -new file mode 100644 -index 000000000000..5214b158d82f ---- /dev/null -+++ b/kernel/sched/MuQSS.h -@@ -0,0 +1,1005 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef MUQSS_SCHED_H -+#define MUQSS_SCHED_H -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_PARAVIRT -+#include -+#endif -+ -+#include "cpupri.h" -+ -+#ifdef CONFIG_SCHED_DEBUG -+# define SCHED_WARN_ON(x) WARN_ONCE(x, #x) -+#else -+# define SCHED_WARN_ON(x) ((void)(x)) -+#endif -+ -+/* task_struct::on_rq states: */ -+#define TASK_ON_RQ_QUEUED 1 -+#define TASK_ON_RQ_MIGRATING 2 -+ -+struct rq; -+ -+#ifdef CONFIG_SMP -+ -+static inline bool sched_asym_prefer(int a, int b) -+{ -+ return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b); -+} -+ -+struct perf_domain { -+ struct em_perf_domain *em_pd; -+ struct perf_domain *next; -+ struct rcu_head rcu; -+}; -+ -+/* Scheduling group status flags */ -+#define SG_OVERLOAD 0x1 /* More than one runnable task on a CPU. */ -+#define SG_OVERUTILIZED 0x2 /* One or more CPUs are over-utilized. */ -+ -+/* -+ * We add the notion of a root-domain which will be used to define per-domain -+ * variables. Each exclusive cpuset essentially defines an island domain by -+ * fully partitioning the member cpus from any other cpuset. Whenever a new -+ * exclusive cpuset is created, we also create and attach a new root-domain -+ * object. -+ * -+ */ -+struct root_domain { -+ atomic_t refcount; -+ atomic_t rto_count; -+ struct rcu_head rcu; -+ cpumask_var_t span; -+ cpumask_var_t online; -+ -+ /* -+ * Indicate pullable load on at least one CPU, e.g: -+ * - More than one runnable task -+ * - Running task is misfit -+ */ -+ int overload; -+ -+ /* Indicate one or more cpus over-utilized (tipping point) */ -+ int overutilized; -+ -+ /* -+ * The bit corresponding to a CPU gets set here if such CPU has more -+ * than one runnable -deadline task (as it is below for RT tasks). -+ */ -+ cpumask_var_t dlo_mask; -+ atomic_t dlo_count; -+ /* Replace unused CFS structures with void */ -+ //struct dl_bw dl_bw; -+ //struct cpudl cpudl; -+ void *dl_bw; -+ void *cpudl; -+ -+ /* -+ * The "RT overload" flag: it gets set if a CPU has more than -+ * one runnable RT task. -+ */ -+ cpumask_var_t rto_mask; -+ //struct cpupri cpupri; -+ void *cpupri; -+ -+ unsigned long max_cpu_capacity; -+ -+ /* -+ * NULL-terminated list of performance domains intersecting with the -+ * CPUs of the rd. Protected by RCU. -+ */ -+ struct perf_domain *pd; -+}; -+ -+extern void init_defrootdomain(void); -+extern int sched_init_domains(const struct cpumask *cpu_map); -+extern void rq_attach_root(struct rq *rq, struct root_domain *rd); -+ -+static inline void cpupri_cleanup(void __maybe_unused *cpupri) -+{ -+} -+ -+static inline void cpudl_cleanup(void __maybe_unused *cpudl) -+{ -+} -+ -+static inline void init_dl_bw(void __maybe_unused *dl_bw) -+{ -+} -+ -+static inline int cpudl_init(void __maybe_unused *dl_bw) -+{ -+ return 0; -+} -+ -+static inline int cpupri_init(void __maybe_unused *cpupri) -+{ -+ return 0; -+} -+#endif /* CONFIG_SMP */ -+ -+/* -+ * This is the main, per-CPU runqueue data structure. -+ * This data should only be modified by the local cpu. -+ */ -+struct rq { -+ raw_spinlock_t *lock; -+ raw_spinlock_t *orig_lock; -+ -+ struct task_struct *curr, *idle, *stop; -+ struct mm_struct *prev_mm; -+ -+ unsigned int nr_running; -+ /* -+ * This is part of a global counter where only the total sum -+ * over all CPUs matters. A task can increase this counter on -+ * one CPU and if it got migrated afterwards it may decrease -+ * it on another CPU. Always updated under the runqueue lock: -+ */ -+ unsigned long nr_uninterruptible; -+ u64 nr_switches; -+ -+ /* Stored data about rq->curr to work outside rq lock */ -+ u64 rq_deadline; -+ int rq_prio; -+ -+ /* Best queued id for use outside lock */ -+ u64 best_key; -+ -+ unsigned long last_scheduler_tick; /* Last jiffy this RQ ticked */ -+ unsigned long last_jiffy; /* Last jiffy this RQ updated rq clock */ -+ u64 niffies; /* Last time this RQ updated rq clock */ -+ u64 last_niffy; /* Last niffies as updated by local clock */ -+ u64 last_jiffy_niffies; /* Niffies @ last_jiffy */ -+ -+ u64 load_update; /* When we last updated load */ -+ unsigned long load_avg; /* Rolling load average */ -+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ -+ u64 irq_load_update; /* When we last updated IRQ load */ -+ unsigned long irq_load_avg; /* Rolling IRQ load average */ -+#endif -+#ifdef CONFIG_SMT_NICE -+ struct mm_struct *rq_mm; -+ int rq_smt_bias; /* Policy/nice level bias across smt siblings */ -+#endif -+ /* Accurate timekeeping data */ -+ unsigned long user_ns, nice_ns, irq_ns, softirq_ns, system_ns, -+ iowait_ns, idle_ns; -+ atomic_t nr_iowait; -+ -+#ifdef CONFIG_MEMBARRIER -+ int membarrier_state; -+#endif -+ -+ skiplist_node *node; -+ skiplist *sl; -+#ifdef CONFIG_SMP -+ struct task_struct *preempt; /* Preempt triggered on this task */ -+ struct task_struct *preempting; /* Hint only, what task is preempting */ -+ -+ int cpu; /* cpu of this runqueue */ -+ bool online; -+ -+ struct root_domain *rd; -+ struct sched_domain *sd; -+ -+ unsigned long cpu_capacity_orig; -+ -+ int *cpu_locality; /* CPU relative cache distance */ -+ struct rq **rq_order; /* Shared RQs ordered by relative cache distance */ -+ struct rq **cpu_order; /* RQs of discrete CPUs ordered by distance */ -+ -+ struct rq *smp_leader; /* First physical CPU per node */ -+#ifdef CONFIG_SCHED_SMT -+ struct rq *smt_leader; /* First logical CPU in SMT siblings */ -+ cpumask_t thread_mask; -+ bool (*siblings_idle)(struct rq *rq); -+ /* See if all smt siblings are idle */ -+#endif /* CONFIG_SCHED_SMT */ -+#ifdef CONFIG_SCHED_MC -+ struct rq *mc_leader; /* First logical CPU in MC siblings */ -+ cpumask_t core_mask; -+ bool (*cache_idle)(struct rq *rq); -+ /* See if all cache siblings are idle */ -+#endif /* CONFIG_SCHED_MC */ -+#endif /* CONFIG_SMP */ -+#ifdef CONFIG_IRQ_TIME_ACCOUNTING -+ u64 prev_irq_time; -+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ -+#ifdef CONFIG_PARAVIRT -+ u64 prev_steal_time; -+#endif /* CONFIG_PARAVIRT */ -+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING -+ u64 prev_steal_time_rq; -+#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */ -+ -+ u64 clock, old_clock, last_tick; -+ /* Ensure that all clocks are in the same cache line */ -+ u64 clock_task ____cacheline_aligned; -+ int dither; -+ -+ int iso_ticks; -+ bool iso_refractory; -+ -+#ifdef CONFIG_HIGH_RES_TIMERS -+ struct hrtimer hrexpiry_timer; -+#endif -+ -+ int rt_nr_running; /* Number real time tasks running */ -+#ifdef CONFIG_SCHEDSTATS -+ -+ /* latency stats */ -+ struct sched_info rq_sched_info; -+ unsigned long long rq_cpu_time; -+ /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ -+ -+ /* sys_sched_yield() stats */ -+ unsigned int yld_count; -+ -+ /* schedule() stats */ -+ unsigned int sched_switch; -+ unsigned int sched_count; -+ unsigned int sched_goidle; -+ -+ /* try_to_wake_up() stats */ -+ unsigned int ttwu_count; -+ unsigned int ttwu_local; -+#endif /* CONFIG_SCHEDSTATS */ -+ -+#ifdef CONFIG_SMP -+ struct llist_head wake_list; -+#endif -+ -+#ifdef CONFIG_CPU_IDLE -+ /* Must be inspected within a rcu lock section */ -+ struct cpuidle_state *idle_state; -+#endif -+}; -+ -+struct rq_flags { -+ unsigned long flags; -+}; -+ -+#ifdef CONFIG_SMP -+struct rq *cpu_rq(int cpu); -+#endif -+ -+#ifndef CONFIG_SMP -+extern struct rq *uprq; -+#define cpu_rq(cpu) (uprq) -+#define this_rq() (uprq) -+#define raw_rq() (uprq) -+#define task_rq(p) (uprq) -+#define cpu_curr(cpu) ((uprq)->curr) -+#else /* CONFIG_SMP */ -+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); -+#define this_rq() this_cpu_ptr(&runqueues) -+#define raw_rq() raw_cpu_ptr(&runqueues) -+#define task_rq(p) cpu_rq(task_cpu(p)) -+#endif /* CONFIG_SMP */ -+ -+static inline int task_current(struct rq *rq, struct task_struct *p) -+{ -+ return rq->curr == p; -+} -+ -+static inline int task_running(struct rq *rq, struct task_struct *p) -+{ -+#ifdef CONFIG_SMP -+ return p->on_cpu; -+#else -+ return task_current(rq, p); -+#endif -+} -+ -+static inline int task_on_rq_queued(struct task_struct *p) -+{ -+ return p->on_rq == TASK_ON_RQ_QUEUED; -+} -+ -+static inline int task_on_rq_migrating(struct task_struct *p) -+{ -+ return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING; -+} -+ -+static inline void rq_lock(struct rq *rq) -+ __acquires(rq->lock) -+{ -+ raw_spin_lock(rq->lock); -+} -+ -+static inline void rq_unlock(struct rq *rq) -+ __releases(rq->lock) -+{ -+ raw_spin_unlock(rq->lock); -+} -+ -+static inline void rq_lock_irq(struct rq *rq) -+ __acquires(rq->lock) -+{ -+ raw_spin_lock_irq(rq->lock); -+} -+ -+static inline void rq_unlock_irq(struct rq *rq, struct rq_flags __always_unused *rf) -+ __releases(rq->lock) -+{ -+ raw_spin_unlock_irq(rq->lock); -+} -+ -+static inline void rq_lock_irqsave(struct rq *rq, struct rq_flags *rf) -+ __acquires(rq->lock) -+{ -+ raw_spin_lock_irqsave(rq->lock, rf->flags); -+} -+ -+static inline void rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf) -+ __releases(rq->lock) -+{ -+ raw_spin_unlock_irqrestore(rq->lock, rf->flags); -+} -+ -+static inline struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) -+ __acquires(p->pi_lock) -+ __acquires(rq->lock) -+{ -+ struct rq *rq; -+ -+ while (42) { -+ raw_spin_lock_irqsave(&p->pi_lock, rf->flags); -+ rq = task_rq(p); -+ raw_spin_lock(rq->lock); -+ if (likely(rq == task_rq(p))) -+ break; -+ raw_spin_unlock(rq->lock); -+ raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); -+ } -+ return rq; -+} -+ -+static inline void task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf) -+ __releases(rq->lock) -+ __releases(p->pi_lock) -+{ -+ rq_unlock(rq); -+ raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); -+} -+ -+static inline struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags __always_unused *rf) -+ __acquires(rq->lock) -+{ -+ struct rq *rq; -+ -+ lockdep_assert_held(&p->pi_lock); -+ -+ while (42) { -+ rq = task_rq(p); -+ raw_spin_lock(rq->lock); -+ if (likely(rq == task_rq(p))) -+ break; -+ raw_spin_unlock(rq->lock); -+ } -+ return rq; -+} -+ -+static inline void __task_rq_unlock(struct rq *rq, struct rq_flags __always_unused *rf) -+{ -+ rq_unlock(rq); -+} -+ -+static inline struct rq * -+this_rq_lock_irq(struct rq_flags *rf) -+ __acquires(rq->lock) -+{ -+ struct rq *rq; -+ -+ local_irq_disable(); -+ rq = this_rq(); -+ rq_lock(rq); -+ return rq; -+} -+ -+/* -+ * {de,en}queue flags: Most not used on MuQSS. -+ * -+ * DEQUEUE_SLEEP - task is no longer runnable -+ * ENQUEUE_WAKEUP - task just became runnable -+ * -+ * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks -+ * are in a known state which allows modification. Such pairs -+ * should preserve as much state as possible. -+ * -+ * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location -+ * in the runqueue. -+ * -+ * ENQUEUE_HEAD - place at front of runqueue (tail if not specified) -+ * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline) -+ * ENQUEUE_MIGRATED - the task was migrated during wakeup -+ * -+ */ -+ -+#define DEQUEUE_SLEEP 0x01 -+#define DEQUEUE_SAVE 0x02 /* matches ENQUEUE_RESTORE */ -+ -+#define ENQUEUE_WAKEUP 0x01 -+#define ENQUEUE_RESTORE 0x02 -+ -+#ifdef CONFIG_SMP -+#define ENQUEUE_MIGRATED 0x40 -+#else -+#define ENQUEUE_MIGRATED 0x00 -+#endif -+ -+static inline u64 __rq_clock_broken(struct rq *rq) -+{ -+ return READ_ONCE(rq->clock); -+} -+ -+static inline u64 rq_clock(struct rq *rq) -+{ -+ lockdep_assert_held(rq->lock); -+ -+ return rq->clock; -+} -+ -+static inline u64 rq_clock_task(struct rq *rq) -+{ -+ lockdep_assert_held(rq->lock); -+ -+ return rq->clock_task; -+} -+ -+#ifdef CONFIG_NUMA -+enum numa_topology_type { -+ NUMA_DIRECT, -+ NUMA_GLUELESS_MESH, -+ NUMA_BACKPLANE, -+}; -+extern enum numa_topology_type sched_numa_topology_type; -+extern int sched_max_numa_distance; -+extern bool find_numa_distance(int distance); -+extern void sched_init_numa(void); -+extern void sched_domains_numa_masks_set(unsigned int cpu); -+extern void sched_domains_numa_masks_clear(unsigned int cpu); -+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu); -+#else -+static inline void sched_init_numa(void) { } -+static inline void sched_domains_numa_masks_set(unsigned int cpu) { } -+static inline void sched_domains_numa_masks_clear(unsigned int cpu) { } -+static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu) -+{ -+ return nr_cpu_ids; -+} -+#endif -+ -+extern struct mutex sched_domains_mutex; -+extern struct static_key_false sched_schedstats; -+ -+#define rcu_dereference_check_sched_domain(p) \ -+ rcu_dereference_check((p), \ -+ lockdep_is_held(&sched_domains_mutex)) -+ -+#ifdef CONFIG_SMP -+ -+/* -+ * The domain tree (rq->sd) is protected by RCU's quiescent state transition. -+ * See destroy_sched_domains: call_rcu for details. -+ * -+ * The domain tree of any CPU may only be accessed from within -+ * preempt-disabled sections. -+ */ -+#define for_each_domain(cpu, __sd) \ -+ for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \ -+ __sd; __sd = __sd->parent) -+ -+#define for_each_lower_domain(sd) for (; sd; sd = sd->child) -+ -+/** -+ * highest_flag_domain - Return highest sched_domain containing flag. -+ * @cpu: The cpu whose highest level of sched domain is to -+ * be returned. -+ * @flag: The flag to check for the highest sched_domain -+ * for the given cpu. -+ * -+ * Returns the highest sched_domain of a cpu which contains the given flag. -+ */ -+static inline struct sched_domain *highest_flag_domain(int cpu, int flag) -+{ -+ struct sched_domain *sd, *hsd = NULL; -+ -+ for_each_domain(cpu, sd) { -+ if (!(sd->flags & flag)) -+ break; -+ hsd = sd; -+ } -+ -+ return hsd; -+} -+ -+static inline struct sched_domain *lowest_flag_domain(int cpu, int flag) -+{ -+ struct sched_domain *sd; -+ -+ for_each_domain(cpu, sd) { -+ if (sd->flags & flag) -+ break; -+ } -+ -+ return sd; -+} -+ -+DECLARE_PER_CPU(struct sched_domain *, sd_llc); -+DECLARE_PER_CPU(int, sd_llc_size); -+DECLARE_PER_CPU(int, sd_llc_id); -+DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared); -+DECLARE_PER_CPU(struct sched_domain *, sd_numa); -+DECLARE_PER_CPU(struct sched_domain *, sd_asym_packing); -+DECLARE_PER_CPU(struct sched_domain *, sd_asym_cpucapacity); -+ -+struct sched_group_capacity { -+ atomic_t ref; -+ /* -+ * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity -+ * for a single CPU. -+ */ -+ unsigned long capacity; -+ unsigned long min_capacity; /* Min per-CPU capacity in group */ -+ unsigned long max_capacity; /* Max per-CPU capacity in group */ -+ unsigned long next_update; -+ int imbalance; /* XXX unrelated to capacity but shared group state */ -+ -+#ifdef CONFIG_SCHED_DEBUG -+ int id; -+#endif -+ -+ unsigned long cpumask[0]; /* balance mask */ -+}; -+ -+struct sched_group { -+ struct sched_group *next; /* Must be a circular list */ -+ atomic_t ref; -+ -+ unsigned int group_weight; -+ struct sched_group_capacity *sgc; -+ int asym_prefer_cpu; /* cpu of highest priority in group */ -+ -+ /* -+ * The CPUs this group covers. -+ * -+ * NOTE: this field is variable length. (Allocated dynamically -+ * by attaching extra space to the end of the structure, -+ * depending on how many CPUs the kernel has booted up with) -+ */ -+ unsigned long cpumask[0]; -+}; -+ -+static inline struct cpumask *sched_group_span(struct sched_group *sg) -+{ -+ return to_cpumask(sg->cpumask); -+} -+ -+/* -+ * See build_balance_mask(). -+ */ -+static inline struct cpumask *group_balance_mask(struct sched_group *sg) -+{ -+ return to_cpumask(sg->sgc->cpumask); -+} -+ -+/** -+ * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. -+ * @group: The group whose first cpu is to be returned. -+ */ -+static inline unsigned int group_first_cpu(struct sched_group *group) -+{ -+ return cpumask_first(sched_group_span(group)); -+} -+ -+ -+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) -+void register_sched_domain_sysctl(void); -+void dirty_sched_domain_sysctl(int cpu); -+void unregister_sched_domain_sysctl(void); -+#else -+static inline void register_sched_domain_sysctl(void) -+{ -+} -+static inline void dirty_sched_domain_sysctl(int cpu) -+{ -+} -+static inline void unregister_sched_domain_sysctl(void) -+{ -+} -+#endif -+ -+extern void sched_ttwu_pending(void); -+extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask); -+extern void set_rq_online (struct rq *rq); -+extern void set_rq_offline(struct rq *rq); -+extern bool sched_smp_initialized; -+ -+static inline void update_group_capacity(struct sched_domain *sd, int cpu) -+{ -+} -+ -+static inline void trigger_load_balance(struct rq *rq) -+{ -+} -+ -+#define sched_feat(x) 0 -+ -+#else /* CONFIG_SMP */ -+ -+static inline void sched_ttwu_pending(void) { } -+ -+#endif /* CONFIG_SMP */ -+ -+#ifdef CONFIG_CPU_IDLE -+static inline void idle_set_state(struct rq *rq, -+ struct cpuidle_state *idle_state) -+{ -+ rq->idle_state = idle_state; -+} -+ -+static inline struct cpuidle_state *idle_get_state(struct rq *rq) -+{ -+ SCHED_WARN_ON(!rcu_read_lock_held()); -+ return rq->idle_state; -+} -+#else -+static inline void idle_set_state(struct rq *rq, -+ struct cpuidle_state *idle_state) -+{ -+} -+ -+static inline struct cpuidle_state *idle_get_state(struct rq *rq) -+{ -+ return NULL; -+} -+#endif -+ -+#ifdef CONFIG_SCHED_DEBUG -+extern bool sched_debug_enabled; -+#endif -+ -+extern void schedule_idle(void); -+ -+#ifdef CONFIG_IRQ_TIME_ACCOUNTING -+struct irqtime { -+ u64 total; -+ u64 tick_delta; -+ u64 irq_start_time; -+ struct u64_stats_sync sync; -+}; -+ -+DECLARE_PER_CPU(struct irqtime, cpu_irqtime); -+ -+/* -+ * Returns the irqtime minus the softirq time computed by ksoftirqd. -+ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime -+ * and never move forward. -+ */ -+static inline u64 irq_time_read(int cpu) -+{ -+ struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu); -+ unsigned int seq; -+ u64 total; -+ -+ do { -+ seq = __u64_stats_fetch_begin(&irqtime->sync); -+ total = irqtime->total; -+ } while (__u64_stats_fetch_retry(&irqtime->sync, seq)); -+ -+ return total; -+} -+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ -+ -+static inline bool sched_stop_runnable(struct rq *rq) -+{ -+ return rq->stop && task_on_rq_queued(rq->stop); -+} -+ -+#ifdef CONFIG_SMP -+static inline int cpu_of(struct rq *rq) -+{ -+ return rq->cpu; -+} -+#else /* CONFIG_SMP */ -+static inline int cpu_of(struct rq *rq) -+{ -+ return 0; -+} -+#endif -+ -+#ifdef CONFIG_CPU_FREQ -+DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); -+ -+static inline void cpufreq_trigger(struct rq *rq, unsigned int flags) -+{ -+ struct update_util_data *data; -+ -+ data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data, -+ cpu_of(rq))); -+ -+ if (data) -+ data->func(data, rq->niffies, flags); -+} -+#else -+static inline void cpufreq_trigger(struct rq *rq, unsigned int flag) -+{ -+} -+#endif /* CONFIG_CPU_FREQ */ -+ -+static __always_inline -+unsigned int uclamp_util_with(struct rq __maybe_unused *rq, unsigned int util, -+ struct task_struct __maybe_unused *p) -+{ -+ return util; -+} -+ -+static inline unsigned int uclamp_util(struct rq *rq, unsigned int util) -+{ -+ return util; -+} -+ -+#ifdef arch_scale_freq_capacity -+#ifndef arch_scale_freq_invariant -+#define arch_scale_freq_invariant() (true) -+#endif -+#else /* arch_scale_freq_capacity */ -+#define arch_scale_freq_invariant() (false) -+#endif -+ -+/* -+ * This should only be called when current == rq->idle. Dodgy workaround for -+ * when softirqs are pending and we are in the idle loop. Setting current to -+ * resched will kick us out of the idle loop and the softirqs will be serviced -+ * on our next pass through schedule(). -+ */ -+static inline bool softirq_pending(int cpu) -+{ -+ if (likely(!local_softirq_pending())) -+ return false; -+ set_tsk_need_resched(current); -+ return true; -+} -+ -+#ifdef CONFIG_64BIT -+static inline u64 read_sum_exec_runtime(struct task_struct *t) -+{ -+ return tsk_seruntime(t); -+} -+#else -+static inline u64 read_sum_exec_runtime(struct task_struct *t) -+{ -+ struct rq_flags rf; -+ u64 ns; -+ struct rq *rq; -+ -+ rq = task_rq_lock(t, &rf); -+ ns = tsk_seruntime(t); -+ task_rq_unlock(rq, t, &rf); -+ -+ return ns; -+} -+#endif -+ -+#ifndef arch_scale_freq_capacity -+static __always_inline -+unsigned long arch_scale_freq_capacity(int cpu) -+{ -+ return SCHED_CAPACITY_SCALE; -+} -+#endif -+ -+#ifdef CONFIG_NO_HZ_FULL -+extern bool sched_can_stop_tick(struct rq *rq); -+extern int __init sched_tick_offload_init(void); -+ -+/* -+ * Tick may be needed by tasks in the runqueue depending on their policy and -+ * requirements. If tick is needed, lets send the target an IPI to kick it out of -+ * nohz mode if necessary. -+ */ -+static inline void sched_update_tick_dependency(struct rq *rq) -+{ -+ int cpu; -+ -+ if (!tick_nohz_full_enabled()) -+ return; -+ -+ cpu = cpu_of(rq); -+ -+ if (!tick_nohz_full_cpu(cpu)) -+ return; -+ -+ if (sched_can_stop_tick(rq)) -+ tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED); -+ else -+ tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED); -+} -+#else -+static inline int sched_tick_offload_init(void) { return 0; } -+static inline void sched_update_tick_dependency(struct rq *rq) { } -+#endif -+ -+#define SCHED_FLAG_SUGOV 0x10000000 -+ -+static inline bool rt_rq_is_runnable(struct rq *rt_rq) -+{ -+ return rt_rq->rt_nr_running; -+} -+ -+/** -+ * enum schedutil_type - CPU utilization type -+ * @FREQUENCY_UTIL: Utilization used to select frequency -+ * @ENERGY_UTIL: Utilization used during energy calculation -+ * -+ * The utilization signals of all scheduling classes (CFS/RT/DL) and IRQ time -+ * need to be aggregated differently depending on the usage made of them. This -+ * enum is used within schedutil_freq_util() to differentiate the types of -+ * utilization expected by the callers, and adjust the aggregation accordingly. -+ */ -+enum schedutil_type { -+ FREQUENCY_UTIL, -+ ENERGY_UTIL, -+}; -+ -+#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL -+ -+unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs, -+ unsigned long max, enum schedutil_type type, -+ struct task_struct *p); -+ -+static inline unsigned long cpu_bw_dl(struct rq *rq) -+{ -+ return 0; -+} -+ -+static inline unsigned long cpu_util_dl(struct rq *rq) -+{ -+ return 0; -+} -+ -+static inline unsigned long cpu_util_cfs(struct rq *rq) -+{ -+ unsigned long ret = READ_ONCE(rq->load_avg); -+ -+ if (ret > SCHED_CAPACITY_SCALE) -+ ret = SCHED_CAPACITY_SCALE; -+ return ret; -+} -+ -+static inline unsigned long cpu_util_rt(struct rq *rq) -+{ -+ unsigned long ret = READ_ONCE(rq->rt_nr_running); -+ -+ if (ret > SCHED_CAPACITY_SCALE) -+ ret = SCHED_CAPACITY_SCALE; -+ return ret; -+} -+ -+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ -+static inline unsigned long cpu_util_irq(struct rq *rq) -+{ -+ unsigned long ret = READ_ONCE(rq->irq_load_avg); -+ -+ if (ret > SCHED_CAPACITY_SCALE) -+ ret = SCHED_CAPACITY_SCALE; -+ return ret; -+} -+ -+static inline -+unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max) -+{ -+ util *= (max - irq); -+ util /= max; -+ -+ return util; -+ -+} -+#else -+static inline unsigned long cpu_util_irq(struct rq *rq) -+{ -+ return 0; -+} -+ -+static inline -+unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max) -+{ -+ return util; -+} -+#endif -+#endif -+ -+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) -+#define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus))) -+ -+DECLARE_STATIC_KEY_FALSE(sched_energy_present); -+ -+static inline bool sched_energy_enabled(void) -+{ -+ return static_branch_unlikely(&sched_energy_present); -+} -+ -+#else /* ! (CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */ -+ -+#define perf_domain_span(pd) NULL -+static inline bool sched_energy_enabled(void) { return false; } -+ -+#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */ -+ -+#ifdef CONFIG_MEMBARRIER -+/* -+ * The scheduler provides memory barriers required by membarrier between: -+ * - prior user-space memory accesses and store to rq->membarrier_state, -+ * - store to rq->membarrier_state and following user-space memory accesses. -+ * In the same way it provides those guarantees around store to rq->curr. -+ */ -+static inline void membarrier_switch_mm(struct rq *rq, -+ struct mm_struct *prev_mm, -+ struct mm_struct *next_mm) -+{ -+ int membarrier_state; -+ -+ if (prev_mm == next_mm) -+ return; -+ -+ membarrier_state = atomic_read(&next_mm->membarrier_state); -+ if (READ_ONCE(rq->membarrier_state) == membarrier_state) -+ return; -+ -+ WRITE_ONCE(rq->membarrier_state, membarrier_state); -+} -+#else -+static inline void membarrier_switch_mm(struct rq *rq, -+ struct mm_struct *prev_mm, -+ struct mm_struct *next_mm) -+{ -+} -+#endif -+ -+#endif /* MUQSS_SCHED_H */ -diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c -index 86800b4d5453..f3d8dca0538a 100644 ---- a/kernel/sched/cpufreq_schedutil.c -+++ b/kernel/sched/cpufreq_schedutil.c -@@ -185,6 +185,12 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, - return cpufreq_driver_resolve_freq(policy, freq); - } - -+#ifdef CONFIG_SCHED_MUQSS -+#define rt_rq_runnable(rq_rt) rt_rq_is_runnable(rq) -+#else -+#define rt_rq_runnable(rq_rt) rt_rq_is_runnable(&rq->rt) -+#endif -+ - /* - * This function computes an effective utilization for the given CPU, to be - * used for frequency selection given the linear relation: f = u * f_max. -@@ -213,7 +219,7 @@ unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs, - struct rq *rq = cpu_rq(cpu); - - if (!IS_BUILTIN(CONFIG_UCLAMP_TASK) && -- type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) { -+ type == FREQUENCY_UTIL && rt_rq_runnable(rq)) { - return max; - } - -@@ -658,7 +664,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) - struct task_struct *thread; - struct sched_attr attr = { - .size = sizeof(struct sched_attr), -+#ifdef CONFIG_SCHED_MUQSS -+ .sched_policy = SCHED_RR, -+#else - .sched_policy = SCHED_DEADLINE, -+#endif - .sched_flags = SCHED_FLAG_SUGOV, - .sched_nice = 0, - .sched_priority = 0, -diff --git a/kernel/sched/cpupri.h b/kernel/sched/cpupri.h -index 7dc20a3232e7..e733a0a53b0a 100644 ---- a/kernel/sched/cpupri.h -+++ b/kernel/sched/cpupri.h -@@ -17,9 +17,11 @@ struct cpupri { - int *cpu_to_pri; - }; - -+#ifndef CONFIG_SCHED_MUQSS - #ifdef CONFIG_SMP - int cpupri_find(struct cpupri *cp, struct task_struct *p, struct cpumask *lowest_mask); - void cpupri_set(struct cpupri *cp, int cpu, int pri); - int cpupri_init(struct cpupri *cp); - void cpupri_cleanup(struct cpupri *cp); - #endif -+#endif -diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c -index 46ed4e1383e2..f077fcd22d2b 100644 ---- a/kernel/sched/cputime.c -+++ b/kernel/sched/cputime.c -@@ -266,26 +266,6 @@ static inline u64 account_other_time(u64 max) - return accounted; - } - --#ifdef CONFIG_64BIT --static inline u64 read_sum_exec_runtime(struct task_struct *t) --{ -- return t->se.sum_exec_runtime; --} --#else --static u64 read_sum_exec_runtime(struct task_struct *t) --{ -- u64 ns; -- struct rq_flags rf; -- struct rq *rq; -- -- rq = task_rq_lock(t, &rf); -- ns = t->se.sum_exec_runtime; -- task_rq_unlock(rq, t, &rf); -- -- return ns; --} --#endif -- - /* - * Accumulate raw cputime values of dead tasks (sig->[us]time) and live - * tasks (sum on group iteration) belonging to @tsk's group. -@@ -663,7 +643,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, - void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) - { - struct task_cputime cputime = { -- .sum_exec_runtime = p->se.sum_exec_runtime, -+ .sum_exec_runtime = tsk_seruntime(p), - }; - - task_cputime(p, &cputime.utime, &cputime.stime); -diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c -index f65ef1e2f204..e0aa6c73a5fa 100644 ---- a/kernel/sched/idle.c -+++ b/kernel/sched/idle.c -@@ -225,6 +225,8 @@ static void cpuidle_idle_call(void) - static void do_idle(void) - { - int cpu = smp_processor_id(); -+ bool pending = false; -+ - /* - * If the arch has a polling bit, we maintain an invariant: - * -@@ -235,7 +237,10 @@ static void do_idle(void) - */ - - __current_set_polling(); -- tick_nohz_idle_enter(); -+ if (unlikely(softirq_pending(cpu))) -+ pending = true; -+ else -+ tick_nohz_idle_enter(); - - while (!need_resched()) { - rmb(); -@@ -273,7 +278,8 @@ static void do_idle(void) - * an IPI to fold the state for us. - */ - preempt_set_need_resched(); -- tick_nohz_idle_exit(); -+ if (!pending) -+ tick_nohz_idle_exit(); - __current_clr_polling(); - - /* -@@ -355,6 +361,7 @@ void cpu_startup_entry(enum cpuhp_state state) - do_idle(); - } - -+#ifndef CONFIG_SCHED_MUQSS - /* - * idle-task scheduling class. - */ -@@ -479,3 +486,4 @@ const struct sched_class idle_sched_class = { - .switched_to = switched_to_idle, - .update_curr = update_curr_idle, - }; -+#endif /* CONFIG_SCHED_MUQSS */ -diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index c8870c5bd7df..add1d74c2e91 100644 ---- a/kernel/sched/sched.h -+++ b/kernel/sched/sched.h -@@ -2,6 +2,19 @@ - /* - * Scheduler internal types and methods: - */ -+#ifdef CONFIG_SCHED_MUQSS -+#include "MuQSS.h" -+ -+/* Begin compatibility wrappers for MuQSS/CFS differences */ -+#define rq_rt_nr_running(rq) ((rq)->rt_nr_running) -+#define rq_h_nr_running(rq) ((rq)->nr_running) -+ -+#else /* CONFIG_SCHED_MUQSS */ -+ -+#define rq_rt_nr_running(rq) ((rq)->rt.rt_nr_running) -+#define rq_h_nr_running(rq) ((rq)->cfs.h_nr_running) -+ -+ - #include - - #include -@@ -2496,3 +2509,30 @@ static inline void membarrier_switch_mm(struct rq *rq, - { - } - #endif -+ -+/* MuQSS compatibility functions */ -+static inline bool softirq_pending(int cpu) -+{ -+ return false; -+} -+ -+#ifdef CONFIG_64BIT -+static inline u64 read_sum_exec_runtime(struct task_struct *t) -+{ -+ return t->se.sum_exec_runtime; -+} -+#else -+static inline u64 read_sum_exec_runtime(struct task_struct *t) -+{ -+ u64 ns; -+ struct rq_flags rf; -+ struct rq *rq; -+ -+ rq = task_rq_lock(t, &rf); -+ ns = t->se.sum_exec_runtime; -+ task_rq_unlock(rq, t, &rf); -+ -+ return ns; -+} -+#endif -+#endif /* CONFIG_SCHED_MUQSS */ -diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c -index 49b835f1305f..0253ea846c0d 100644 ---- a/kernel/sched/topology.c -+++ b/kernel/sched/topology.c -@@ -3,6 +3,7 @@ - * Scheduler topology setup/handling methods - */ - #include "sched.h" -+#include "linux/sched/deadline.h" - - DEFINE_MUTEX(sched_domains_mutex); - -@@ -442,7 +443,11 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd) - struct root_domain *old_rd = NULL; - unsigned long flags; - -+#ifdef CONFIG_SCHED_MUQSS -+ raw_spin_lock_irqsave(rq->lock, flags); -+#else - raw_spin_lock_irqsave(&rq->lock, flags); -+#endif - - if (rq->rd) { - old_rd = rq->rd; -@@ -468,7 +473,11 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd) - if (cpumask_test_cpu(rq->cpu, cpu_active_mask)) - set_rq_online(rq); - -+#ifdef CONFIG_SCHED_MUQSS -+ raw_spin_unlock_irqrestore(rq->lock, flags); -+#else - raw_spin_unlock_irqrestore(&rq->lock, flags); -+#endif - - if (old_rd) - call_rcu(&old_rd->rcu, free_rootdomain); -diff --git a/kernel/skip_list.c b/kernel/skip_list.c -new file mode 100644 -index 000000000000..bf5c6e97e139 ---- /dev/null -+++ b/kernel/skip_list.c -@@ -0,0 +1,148 @@ -+/* -+ Copyright (C) 2011,2016 Con Kolivas. -+ -+ Code based on example originally by William Pugh. -+ -+Skip Lists are a probabilistic alternative to balanced trees, as -+described in the June 1990 issue of CACM and were invented by -+William Pugh in 1987. -+ -+A couple of comments about this implementation: -+The routine randomLevel has been hard-coded to generate random -+levels using p=0.25. It can be easily changed. -+ -+The insertion routine has been implemented so as to use the -+dirty hack described in the CACM paper: if a random level is -+generated that is more than the current maximum level, the -+current maximum level plus one is used instead. -+ -+Levels start at zero and go up to MaxLevel (which is equal to -+MaxNumberOfLevels-1). -+ -+The routines defined in this file are: -+ -+init: defines slnode -+ -+new_skiplist: returns a new, empty list -+ -+randomLevel: Returns a random level based on a u64 random seed passed to it. -+In MuQSS, the "niffy" time is used for this purpose. -+ -+insert(l,key, value): inserts the binding (key, value) into l. This operation -+occurs in O(log n) time. -+ -+delnode(slnode, l, node): deletes any binding of key from the l based on the -+actual node value. This operation occurs in O(k) time where k is the -+number of levels of the node in question (max 8). The original delete -+function occurred in O(log n) time and involved a search. -+ -+MuQSS Notes: In this implementation of skiplists, there are bidirectional -+next/prev pointers and the insert function returns a pointer to the actual -+node the value is stored. The key here is chosen by the scheduler so as to -+sort tasks according to the priority list requirements and is no longer used -+by the scheduler after insertion. The scheduler lookup, however, occurs in -+O(1) time because it is always the first item in the level 0 linked list. -+Since the task struct stores a copy of the node pointer upon skiplist_insert, -+it can also remove it much faster than the original implementation with the -+aid of prev<->next pointer manipulation and no searching. -+ -+*/ -+ -+#include -+#include -+ -+#define MaxNumberOfLevels 8 -+#define MaxLevel (MaxNumberOfLevels - 1) -+ -+void skiplist_init(skiplist_node *slnode) -+{ -+ int i; -+ -+ slnode->key = 0xFFFFFFFFFFFFFFFF; -+ slnode->level = 0; -+ slnode->value = NULL; -+ for (i = 0; i < MaxNumberOfLevels; i++) -+ slnode->next[i] = slnode->prev[i] = slnode; -+} -+ -+skiplist *new_skiplist(skiplist_node *slnode) -+{ -+ skiplist *l = kzalloc(sizeof(skiplist), GFP_ATOMIC); -+ -+ BUG_ON(!l); -+ l->header = slnode; -+ return l; -+} -+ -+void free_skiplist(skiplist *l) -+{ -+ skiplist_node *p, *q; -+ -+ p = l->header; -+ do { -+ q = p->next[0]; -+ p->next[0]->prev[0] = q->prev[0]; -+ skiplist_node_init(p); -+ p = q; -+ } while (p != l->header); -+ kfree(l); -+} -+ -+void skiplist_node_init(skiplist_node *node) -+{ -+ memset(node, 0, sizeof(skiplist_node)); -+} -+ -+static inline unsigned int randomLevel(const long unsigned int randseed) -+{ -+ return find_first_bit(&randseed, MaxLevel) / 2; -+} -+ -+void skiplist_insert(skiplist *l, skiplist_node *node, keyType key, valueType value, unsigned int randseed) -+{ -+ skiplist_node *update[MaxNumberOfLevels]; -+ skiplist_node *p, *q; -+ int k = l->level; -+ -+ p = l->header; -+ do { -+ while (q = p->next[k], q->key <= key) -+ p = q; -+ update[k] = p; -+ } while (--k >= 0); -+ -+ ++l->entries; -+ k = randomLevel(randseed); -+ if (k > l->level) { -+ k = ++l->level; -+ update[k] = l->header; -+ } -+ -+ node->level = k; -+ node->key = key; -+ node->value = value; -+ do { -+ p = update[k]; -+ node->next[k] = p->next[k]; -+ p->next[k] = node; -+ node->prev[k] = p; -+ node->next[k]->prev[k] = node; -+ } while (--k >= 0); -+} -+ -+void skiplist_delete(skiplist *l, skiplist_node *node) -+{ -+ int k, m = node->level; -+ -+ for (k = 0; k <= m; k++) { -+ node->prev[k]->next[k] = node->next[k]; -+ node->next[k]->prev[k] = node->prev[k]; -+ } -+ skiplist_node_init(node); -+ if (m == l->level) { -+ while (l->header->next[m] == l->header && l->header->prev[m] == l->header && m > 0) -+ m--; -+ l->level = m; -+ } -+ l->entries--; -+} -diff --git a/kernel/sysctl.c b/kernel/sysctl.c -index b6f2f35d0bcf..72065ca9a8e2 100644 ---- a/kernel/sysctl.c -+++ b/kernel/sysctl.c -@@ -130,8 +130,16 @@ static int __maybe_unused four = 4; - static unsigned long zero_ul; - static unsigned long one_ul = 1; - static unsigned long long_max = LONG_MAX; --static int one_hundred = 100; --static int one_thousand = 1000; -+static int __read_mostly one_hundred = 100; -+static int __read_mostly one_thousand = 1000; -+#ifdef CONFIG_SCHED_MUQSS -+static int zero = 0; -+static int one = 1; -+extern int rr_interval; -+extern int sched_interactive; -+extern int sched_iso_cpu; -+extern int sched_yield_type; -+#endif - #ifdef CONFIG_PRINTK - static int ten_thousand = 10000; - #endif -@@ -300,7 +308,7 @@ static struct ctl_table sysctl_base_table[] = { - { } - }; - --#ifdef CONFIG_SCHED_DEBUG -+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_MUQSS) - static int min_sched_granularity_ns = 100000; /* 100 usecs */ - static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ - static int min_wakeup_granularity_ns; /* 0 usecs */ -@@ -317,6 +325,7 @@ static int max_extfrag_threshold = 1000; - #endif - - static struct ctl_table kern_table[] = { -+#ifndef CONFIG_SCHED_MUQSS - { - .procname = "sched_child_runs_first", - .data = &sysctl_sched_child_runs_first, -@@ -498,6 +507,7 @@ static struct ctl_table kern_table[] = { - .extra2 = SYSCTL_ONE, - }, - #endif -+#endif /* !CONFIG_SCHED_MUQSS */ - #ifdef CONFIG_PROVE_LOCKING - { - .procname = "prove_locking", -@@ -1070,6 +1080,44 @@ static struct ctl_table kern_table[] = { - .proc_handler = proc_dointvec, - }, - #endif -+#ifdef CONFIG_SCHED_MUQSS -+ { -+ .procname = "rr_interval", -+ .data = &rr_interval, -+ .maxlen = sizeof (int), -+ .mode = 0644, -+ .proc_handler = &proc_dointvec_minmax, -+ .extra1 = &one, -+ .extra2 = &one_thousand, -+ }, -+ { -+ .procname = "interactive", -+ .data = &sched_interactive, -+ .maxlen = sizeof(int), -+ .mode = 0644, -+ .proc_handler = &proc_dointvec_minmax, -+ .extra1 = &zero, -+ .extra2 = &one, -+ }, -+ { -+ .procname = "iso_cpu", -+ .data = &sched_iso_cpu, -+ .maxlen = sizeof (int), -+ .mode = 0644, -+ .proc_handler = &proc_dointvec_minmax, -+ .extra1 = &zero, -+ .extra2 = &one_hundred, -+ }, -+ { -+ .procname = "yield_type", -+ .data = &sched_yield_type, -+ .maxlen = sizeof (int), -+ .mode = 0644, -+ .proc_handler = &proc_dointvec_minmax, -+ .extra1 = &zero, -+ .extra2 = &two, -+ }, -+#endif - #if defined(CONFIG_S390) && defined(CONFIG_SMP) - { - .procname = "spin_retry", -diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c -index f5490222e134..7a61971cca74 100644 ---- a/kernel/time/clockevents.c -+++ b/kernel/time/clockevents.c -@@ -190,8 +190,13 @@ int clockevents_tick_resume(struct clock_event_device *dev) - - #ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST - -+#ifdef CONFIG_SCHED_MUQSS -+/* Limit min_delta to 100us */ -+#define MIN_DELTA_LIMIT (NSEC_PER_SEC / 10000) -+#else - /* Limit min_delta to a jiffie */ - #define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ) -+#endif - - /** - * clockevents_increase_min_delta - raise minimum delta of a clock event device -diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c -index 42d512fcfda2..0db83bdf7f39 100644 ---- a/kernel/time/posix-cpu-timers.c -+++ b/kernel/time/posix-cpu-timers.c -@@ -226,7 +226,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples) - u64 stime, utime; - - task_cputime(p, &utime, &stime); -- store_samples(samples, stime, utime, p->se.sum_exec_runtime); -+ store_samples(samples, stime, utime, tsk_seruntime(p)); - } - - static void proc_sample_cputime_atomic(struct task_cputime_atomic *at, -@@ -845,7 +845,7 @@ static void check_thread_timers(struct task_struct *tsk, - soft = task_rlimit(tsk, RLIMIT_RTTIME); - if (soft != RLIM_INFINITY) { - /* Task RT timeout is accounted in jiffies. RTTIME is usec */ -- unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ); -+ unsigned long rttime = tsk_rttimeout(tsk) * (USEC_PER_SEC / HZ); - unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME); - - /* At the hard limit, send SIGKILL. No further action. */ -diff --git a/kernel/time/timer.c b/kernel/time/timer.c -index 4820823515e9..7dcadf9cd865 100644 ---- a/kernel/time/timer.c -+++ b/kernel/time/timer.c -@@ -1567,7 +1567,7 @@ static unsigned long __next_timer_interrupt(struct timer_base *base) - * Check, if the next hrtimer event is before the next timer wheel - * event: - */ --static u64 cmp_next_hrtimer_event(u64 basem, u64 expires) -+static u64 cmp_next_hrtimer_event(struct timer_base *base, u64 basem, u64 expires) - { - u64 nextevt = hrtimer_get_next_event(); - -@@ -1585,6 +1585,9 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires) - if (nextevt <= basem) - return basem; - -+ if (nextevt < expires && nextevt - basem <= TICK_NSEC) -+ base->is_idle = false; -+ - /* - * Round up to the next jiffie. High resolution timers are - * off, so the hrtimers are expired in the tick and we need to -@@ -1654,7 +1657,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) - } - raw_spin_unlock(&base->lock); - -- return cmp_next_hrtimer_event(basem, expires); -+ return cmp_next_hrtimer_event(base, basem, expires); - } - - /** -diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c -index 69ee8ef12cee..6edb01f2fd81 100644 ---- a/kernel/trace/trace_selftest.c -+++ b/kernel/trace/trace_selftest.c -@@ -1048,10 +1048,15 @@ static int trace_wakeup_test_thread(void *data) - { - /* Make this a -deadline thread */ - static const struct sched_attr attr = { -+#ifdef CONFIG_SCHED_MUQSS -+ /* No deadline on MuQSS, use RR */ -+ .sched_policy = SCHED_RR, -+#else - .sched_policy = SCHED_DEADLINE, - .sched_runtime = 100000ULL, - .sched_deadline = 10000000ULL, - .sched_period = 10000000ULL -+#endif - }; - struct wakeup_test_data *x = data; - --- -2.20.1 - diff --git a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/4502_futex-wait-multiple.patch b/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/4502_futex-wait-multiple.patch deleted file mode 100644 index a7860f473..000000000 --- a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/4502_futex-wait-multiple.patch +++ /dev/null @@ -1,326 +0,0 @@ -# Calculate format=diff merge(sys-kernel/calculate-sources[fsync])!= - -Squashed futex-wait-multiple patchset onto stable release v5.2.1 -https://gitlab.collabora.com/krisman/linux/commits/futex-wait-multiple-master - -diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h -index a89eb0accd5e..c34e52e0f787 100644 ---- a/include/uapi/linux/futex.h -+++ b/include/uapi/linux/futex.h -@@ -21,6 +21,7 @@ - #define FUTEX_WAKE_BITSET 10 - #define FUTEX_WAIT_REQUEUE_PI 11 - #define FUTEX_CMP_REQUEUE_PI 12 -+#define FUTEX_WAIT_MULTIPLE 31 - - #define FUTEX_PRIVATE_FLAG 128 - #define FUTEX_CLOCK_REALTIME 256 -@@ -150,4 +151,10 @@ struct robust_list_head { - (((op & 0xf) << 28) | ((cmp & 0xf) << 24) \ - | ((oparg & 0xfff) << 12) | (cmparg & 0xfff)) - -+struct futex_wait_block { -+ __u32 __user *uaddr; -+ __u32 val; -+ __u32 bitset; -+}; -+ - #endif /* _UAPI_LINUX_FUTEX_H */ -diff --git a/kernel/futex.c b/kernel/futex.c -index 6d50728ef2e7..338ae60bd86c 100644 ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -183,6 +183,7 @@ static int __read_mostly futex_cmpxchg_enabled; - #endif - #define FLAGS_CLOCKRT 0x02 - #define FLAGS_HAS_TIMEOUT 0x04 -+#define FLAGS_WAKE_MULTIPLE 0x08 - - /* - * Priority Inheritance state: -@@ -2631,6 +2632,39 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, - __set_current_state(TASK_RUNNING); - } - -+static int __futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, -+ struct futex_q *q, struct futex_hash_bucket **hb) -+{ -+ -+ u32 uval; -+ int ret; -+ -+retry_private: -+ *hb = queue_lock(q); -+ -+ ret = get_futex_value_locked(&uval, uaddr); -+ -+ if (ret) { -+ queue_unlock(*hb); -+ -+ ret = get_user(uval, uaddr); -+ if (ret) -+ return ret; -+ -+ if (!(flags & FLAGS_SHARED)) -+ goto retry_private; -+ -+ return 1; -+ } -+ -+ if (uval != val) { -+ queue_unlock(*hb); -+ ret = -EWOULDBLOCK; -+ } -+ -+ return ret; -+} -+ - /** - * futex_wait_setup() - Prepare to wait on a futex - * @uaddr: the futex userspace address -@@ -2651,7 +2685,6 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, - static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, - struct futex_q *q, struct futex_hash_bucket **hb) - { -- u32 uval; - int ret; - - /* -@@ -2672,38 +2705,161 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, - * absorb a wakeup if *uaddr does not match the desired values - * while the syscall executes. - */ --retry: -- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ); -- if (unlikely(ret != 0)) -- return ret; -+ do { -+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, -+ &q->key, FUTEX_READ); -+ if (unlikely(ret != 0)) -+ return ret; - --retry_private: -- *hb = queue_lock(q); -+ ret = __futex_wait_setup(uaddr, val, flags, q, hb); - -- ret = get_futex_value_locked(&uval, uaddr); -+ /* Drop key reference if retry or error. */ -+ if (ret) -+ put_futex_key(&q->key); -+ } while (ret > 0); - -- if (ret) { -- queue_unlock(*hb); -+ return ret; -+} - -- ret = get_user(uval, uaddr); -- if (ret) -+static int do_futex_wait_multiple(struct futex_wait_block *wb, -+ u32 count, unsigned int flags, -+ ktime_t *abs_time) -+{ -+ -+ struct hrtimer_sleeper timeout, *to; -+ struct futex_hash_bucket *hb; -+ struct futex_q *qs = NULL; -+ int ret; -+ int i; -+ -+ qs = kcalloc(count, sizeof(struct futex_q), GFP_KERNEL); -+ if (!qs) -+ return -ENOMEM; -+ -+ to = futex_setup_timer(abs_time, &timeout, flags, -+ current->timer_slack_ns); -+ retry: -+ for (i = 0; i < count; i++) { -+ qs[i].key = FUTEX_KEY_INIT; -+ qs[i].bitset = wb[i].bitset; -+ -+ ret = get_futex_key(wb[i].uaddr, flags & FLAGS_SHARED, -+ &qs[i].key, FUTEX_READ); -+ if (unlikely(ret != 0)) { -+ for (--i; i >= 0; i--) -+ put_futex_key(&qs[i].key); - goto out; -+ } -+ } - -- if (!(flags & FLAGS_SHARED)) -- goto retry_private; -+ set_current_state(TASK_INTERRUPTIBLE); -+ -+ for (i = 0; i < count; i++) { -+ ret = __futex_wait_setup(wb[i].uaddr, wb[i].val, -+ flags, &qs[i], &hb); -+ if (ret) { -+ /* Drop the failed key directly. keys 0..(i-1) -+ * will be put by unqueue_me. */ -+ put_futex_key(&qs[i].key); -+ -+ /* Undo the partial work we did. */ -+ for (--i; i >= 0; i--) -+ unqueue_me(&qs[i]); -+ -+ __set_current_state(TASK_RUNNING); -+ if (ret > 0) -+ goto retry; -+ goto out; -+ } -+ -+ /* We can't hold to the bucket lock when dealing with -+ * the next futex. Queue ourselves now so we can unlock -+ * it before moving on. */ -+ queue_me(&qs[i], hb); -+ } -+ -+ if (to) -+ hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); -+ -+ /* There is no easy to way to check if we are wake already on -+ * multiple futexes without waking through each one of them. So -+ * just sleep and let the scheduler handle it. -+ */ -+ if (!to || to->task) -+ freezable_schedule(); -+ -+ __set_current_state(TASK_RUNNING); - -- put_futex_key(&q->key); -+ ret = -ETIMEDOUT; -+ /* If we were woken (and unqueued), we succeeded. */ -+ for (i = 0; i < count; i++) -+ if (!unqueue_me(&qs[i])) -+ ret = i; -+ -+ /* Succeed wakeup */ -+ if (ret >= 0) -+ goto out; -+ -+ /* Woken by triggered timeout */ -+ if (to && !to->task) -+ goto out; -+ -+ /* -+ * We expect signal_pending(current), but we might be the -+ * victim of a spurious wakeup as well. -+ */ -+ if (!signal_pending(current)) - goto retry; -+ -+ ret = -ERESTARTSYS; -+ if (!abs_time) -+ goto out; -+ -+ ret = -ERESTART_RESTARTBLOCK; -+ out: -+ if (to) { -+ hrtimer_cancel(&to->timer); -+ destroy_hrtimer_on_stack(&to->timer); - } - -- if (uval != val) { -- queue_unlock(*hb); -- ret = -EWOULDBLOCK; -+ kfree(qs); -+ return ret; -+} -+ -+static int futex_wait_multiple(u32 __user *uaddr, unsigned int flags, -+ u32 count, ktime_t *abs_time) -+{ -+ struct futex_wait_block *wb; -+ struct restart_block *restart; -+ int ret; -+ -+ if (!count) -+ return -EINVAL; -+ -+ wb = kcalloc(count, sizeof(struct futex_wait_block), GFP_KERNEL); -+ if (!wb) -+ return -ENOMEM; -+ -+ if (copy_from_user(wb, uaddr, -+ count * sizeof(struct futex_wait_block))) { -+ ret = -EFAULT; -+ goto out; -+ } -+ -+ ret = do_futex_wait_multiple(wb, count, flags, abs_time); -+ -+ if (ret == -ERESTART_RESTARTBLOCK) { -+ restart = ¤t->restart_block; -+ restart->fn = futex_wait_restart; -+ restart->futex.uaddr = uaddr; -+ restart->futex.val = count; -+ restart->futex.time = *abs_time; -+ restart->futex.flags = (flags | FLAGS_HAS_TIMEOUT | -+ FLAGS_WAKE_MULTIPLE); - } - - out: -- if (ret) -- put_futex_key(&q->key); -+ kfree(wb); - return ret; - } - -@@ -2784,6 +2940,10 @@ static long futex_wait_restart(struct restart_block *restart) - } - restart->fn = do_no_restart_syscall; - -+ if (restart->futex.flags & FLAGS_WAKE_MULTIPLE) -+ return (long)futex_wait_multiple(uaddr, restart->futex.flags, -+ restart->futex.val, tp); -+ - return (long)futex_wait(uaddr, restart->futex.flags, - restart->futex.val, tp, restart->futex.bitset); - } -@@ -3667,6 +3827,8 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, - uaddr2); - case FUTEX_CMP_REQUEUE_PI: - return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1); -+ case FUTEX_WAIT_MULTIPLE: -+ return futex_wait_multiple(uaddr, flags, val, timeout); - } - return -ENOSYS; - } -@@ -3683,7 +3845,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, - - if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || - cmd == FUTEX_WAIT_BITSET || -- cmd == FUTEX_WAIT_REQUEUE_PI)) { -+ cmd == FUTEX_WAIT_REQUEUE_PI || -+ cmd == FUTEX_WAIT_MULTIPLE)) { - if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) - return -EFAULT; - if (get_timespec64(&ts, utime)) -@@ -3692,7 +3855,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, - return -EINVAL; - - t = timespec64_to_ktime(ts); -- if (cmd == FUTEX_WAIT) -+ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE) - t = ktime_add_safe(ktime_get(), t); - tp = &t; - } -@@ -3876,14 +4039,15 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, - - if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || - cmd == FUTEX_WAIT_BITSET || -- cmd == FUTEX_WAIT_REQUEUE_PI)) { -+ cmd == FUTEX_WAIT_REQUEUE_PI || -+ cmd == FUTEX_WAIT_MULTIPLE)) { - if (get_old_timespec32(&ts, utime)) - return -EFAULT; - if (!timespec64_valid(&ts)) - return -EINVAL; - - t = timespec64_to_ktime(ts); -- if (cmd == FUTEX_WAIT) -+ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE) - t = ktime_add_safe(ktime_get(), t); - tp = &t; - } diff --git a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/4503_zstd-patches.patch b/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/4503_zstd-patches.patch deleted file mode 100644 index 665e17a28..000000000 --- a/profiles/templates/3.6/6_ac_install_patch/sys-kernel/calculate-sources/5.4/4503_zstd-patches.patch +++ /dev/null @@ -1,1134 +0,0 @@ -# Calculate format=diff -From 9d55c70c87606f9632da8bf9fdcf0f633f165f2f Mon Sep 17 00:00:00 2001 -From: Nick Terrell -Date: Tue, 31 Mar 2020 22:39:06 -0700 -Subject: [PATCH 1/9] lib: prepare zstd for preboot environment - -* Don't export symbols if ZSTD_PREBOOT is defined. -* Remove a double definition of the CHECK_F macro when the zstd - library is amalgamated. -* Switch ZSTD_copy8() to __builtin_memcpy(), because in the preboot - environment on x86 gcc can't inline `memcpy()` otherwise. -* Limit the gcc hack in ZSTD_wildcopy() to the broken gcc version. See - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388. - -These changes are necessary to get the build to work in the preboot -environment, and to get reasonable performance. ZSTD_copy8() and -ZSTD_wildcopy() are in the core of the zstd hot loop. So outlining -these calls to memcpy(), and having an extra branch are very -detrimental to performance. - -Reviewed-by: Kees Cook -Tested-by: Sedat Dilek -Signed-off-by: Nick Terrell ---- - lib/zstd/decompress.c | 2 ++ - lib/zstd/fse_decompress.c | 9 +-------- - lib/zstd/zstd_internal.h | 14 ++++++++++++-- - 3 files changed, 15 insertions(+), 10 deletions(-) - -diff --git a/lib/zstd/decompress.c b/lib/zstd/decompress.c -index 269ee9a79..73ded6327 100644 ---- a/lib/zstd/decompress.c -+++ b/lib/zstd/decompress.c -@@ -2490,6 +2490,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inB - } - } - -+#ifndef ZSTD_PREBOOT - EXPORT_SYMBOL(ZSTD_DCtxWorkspaceBound); - EXPORT_SYMBOL(ZSTD_initDCtx); - EXPORT_SYMBOL(ZSTD_decompressDCtx); -@@ -2529,3 +2530,4 @@ EXPORT_SYMBOL(ZSTD_insertBlock); - - MODULE_LICENSE("Dual BSD/GPL"); - MODULE_DESCRIPTION("Zstd Decompressor"); -+#endif -diff --git a/lib/zstd/fse_decompress.c b/lib/zstd/fse_decompress.c -index a84300e5a..0b353530f 100644 ---- a/lib/zstd/fse_decompress.c -+++ b/lib/zstd/fse_decompress.c -@@ -47,6 +47,7 @@ - ****************************************************************/ - #include "bitstream.h" - #include "fse.h" -+#include "zstd_internal.h" - #include - #include - #include /* memcpy, memset */ -@@ -60,14 +61,6 @@ - enum { FSE_static_assert = 1 / (int)(!!(c)) }; \ - } /* use only *after* variable declarations */ - --/* check and forward error code */ --#define CHECK_F(f) \ -- { \ -- size_t const e = f; \ -- if (FSE_isError(e)) \ -- return e; \ -- } -- - /* ************************************************************** - * Templates - ****************************************************************/ -diff --git a/lib/zstd/zstd_internal.h b/lib/zstd/zstd_internal.h -index 1a79fab9e..dac753397 100644 ---- a/lib/zstd/zstd_internal.h -+++ b/lib/zstd/zstd_internal.h -@@ -127,7 +127,14 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; - * Shared functions to include for inlining - *********************************************/ - ZSTD_STATIC void ZSTD_copy8(void *dst, const void *src) { -- memcpy(dst, src, 8); -+ /* -+ * zstd relies heavily on gcc being able to analyze and inline this -+ * memcpy() call, since it is called in a tight loop. Preboot mode -+ * is compiled in freestanding mode, which stops gcc from analyzing -+ * memcpy(). Use __builtin_memcpy() to tell gcc to analyze this as a -+ * regular memcpy(). -+ */ -+ __builtin_memcpy(dst, src, 8); - } - /*! ZSTD_wildcopy() : - * custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */ -@@ -137,13 +144,16 @@ ZSTD_STATIC void ZSTD_wildcopy(void *dst, const void *src, ptrdiff_t length) - const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)dst; - BYTE* const oend = op + length; -- /* Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388. -+#if defined(GCC_VERSION) && GCC_VERSION >= 70000 && GCC_VERSION < 70200 -+ /* -+ * Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388. - * Avoid the bad case where the loop only runs once by handling the - * special case separately. This doesn't trigger the bug because it - * doesn't involve pointer/integer overflow. - */ - if (length <= 8) - return ZSTD_copy8(dst, src); -+#endif - do { - ZSTD_copy8(op, ip); - op += 8; --- -2.27.0.rc0 - - -From f2f7175166743fee3fe359a487f341dfb2f1ee50 Mon Sep 17 00:00:00 2001 -From: Nick Terrell -Date: Tue, 31 Mar 2020 22:39:07 -0700 -Subject: [PATCH 2/9] lib: prepare xxhash for preboot environment - -Don't export symbols if XXH_PREBOOT is defined. - -This change is necessary to get xxhash to work in a preboot environment, -which is needed to support zstd-compressed kernels. - -Reviewed-by: Kees Cook -Tested-by: Sedat Dilek -Signed-off-by: Nick Terrell ---- - lib/xxhash.c | 21 ++++++++++++--------- - 1 file changed, 12 insertions(+), 9 deletions(-) - -diff --git a/lib/xxhash.c b/lib/xxhash.c -index aa61e2a38..b4364e011 100644 ---- a/lib/xxhash.c -+++ b/lib/xxhash.c -@@ -80,13 +80,11 @@ void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src) - { - memcpy(dst, src, sizeof(*dst)); - } --EXPORT_SYMBOL(xxh32_copy_state); - - void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src) - { - memcpy(dst, src, sizeof(*dst)); - } --EXPORT_SYMBOL(xxh64_copy_state); - - /*-*************************** - * Simple Hash Functions -@@ -151,7 +149,6 @@ uint32_t xxh32(const void *input, const size_t len, const uint32_t seed) - - return h32; - } --EXPORT_SYMBOL(xxh32); - - static uint64_t xxh64_round(uint64_t acc, const uint64_t input) - { -@@ -234,7 +231,6 @@ uint64_t xxh64(const void *input, const size_t len, const uint64_t seed) - - return h64; - } --EXPORT_SYMBOL(xxh64); - - /*-************************************************** - * Advanced Hash Functions -@@ -251,7 +247,6 @@ void xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed) - state.v4 = seed - PRIME32_1; - memcpy(statePtr, &state, sizeof(state)); - } --EXPORT_SYMBOL(xxh32_reset); - - void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed) - { -@@ -265,7 +260,6 @@ void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed) - state.v4 = seed - PRIME64_1; - memcpy(statePtr, &state, sizeof(state)); - } --EXPORT_SYMBOL(xxh64_reset); - - int xxh32_update(struct xxh32_state *state, const void *input, const size_t len) - { -@@ -334,7 +328,6 @@ int xxh32_update(struct xxh32_state *state, const void *input, const size_t len) - - return 0; - } --EXPORT_SYMBOL(xxh32_update); - - uint32_t xxh32_digest(const struct xxh32_state *state) - { -@@ -372,7 +365,6 @@ uint32_t xxh32_digest(const struct xxh32_state *state) - - return h32; - } --EXPORT_SYMBOL(xxh32_digest); - - int xxh64_update(struct xxh64_state *state, const void *input, const size_t len) - { -@@ -439,7 +431,6 @@ int xxh64_update(struct xxh64_state *state, const void *input, const size_t len) - - return 0; - } --EXPORT_SYMBOL(xxh64_update); - - uint64_t xxh64_digest(const struct xxh64_state *state) - { -@@ -494,7 +485,19 @@ uint64_t xxh64_digest(const struct xxh64_state *state) - - return h64; - } -+ -+#ifndef XXH_PREBOOT -+EXPORT_SYMBOL(xxh32_copy_state); -+EXPORT_SYMBOL(xxh64_copy_state); -+EXPORT_SYMBOL(xxh32); -+EXPORT_SYMBOL(xxh64); -+EXPORT_SYMBOL(xxh32_reset); -+EXPORT_SYMBOL(xxh64_reset); -+EXPORT_SYMBOL(xxh32_update); -+EXPORT_SYMBOL(xxh32_digest); -+EXPORT_SYMBOL(xxh64_update); - EXPORT_SYMBOL(xxh64_digest); - - MODULE_LICENSE("Dual BSD/GPL"); - MODULE_DESCRIPTION("xxHash"); -+#endif --- -2.27.0.rc0 - - -From 2800faddf67a8fabbb4532cb520b84acaadd054e Mon Sep 17 00:00:00 2001 -From: Nick Terrell -Date: Tue, 31 Mar 2020 22:39:08 -0700 -Subject: [PATCH 3/9] lib: add zstd support to decompress - -* Add unzstd() and the zstd decompress interface. -* Add zstd support to decompress_method(). - -The decompress_method() and unzstd() functions are used to decompress -the initramfs and the initrd. The __decompress() function is used in -the preboot environment to decompress a zstd compressed kernel. - -The zstd decompression function allows the input and output buffers to -overlap because that is used by x86 kernel decompression. - -Reviewed-by: Kees Cook -Tested-by: Sedat Dilek -Signed-off-by: Nick Terrell ---- - include/linux/decompress/unzstd.h | 11 + - lib/Kconfig | 4 + - lib/Makefile | 1 + - lib/decompress.c | 5 + - lib/decompress_unzstd.c | 342 ++++++++++++++++++++++++++++++ - 5 files changed, 363 insertions(+) - create mode 100644 include/linux/decompress/unzstd.h - create mode 100644 lib/decompress_unzstd.c - -diff --git a/include/linux/decompress/unzstd.h b/include/linux/decompress/unzstd.h -new file mode 100644 -index 000000000..56d539ae8 ---- /dev/null -+++ b/include/linux/decompress/unzstd.h -@@ -0,0 +1,11 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef LINUX_DECOMPRESS_UNZSTD_H -+#define LINUX_DECOMPRESS_UNZSTD_H -+ -+int unzstd(unsigned char *inbuf, long len, -+ long (*fill)(void*, unsigned long), -+ long (*flush)(void*, unsigned long), -+ unsigned char *output, -+ long *pos, -+ void (*error_fn)(char *x)); -+#endif -diff --git a/lib/Kconfig b/lib/Kconfig -index bc7e56370..11de5fa09 100644 ---- a/lib/Kconfig -+++ b/lib/Kconfig -@@ -336,6 +336,10 @@ config DECOMPRESS_LZ4 - select LZ4_DECOMPRESS - tristate - -+config DECOMPRESS_ZSTD -+ select ZSTD_DECOMPRESS -+ tristate -+ - # - # Generic allocator support is selected if needed - # -diff --git a/lib/Makefile b/lib/Makefile -index 611872c06..09ad45ba6 100644 ---- a/lib/Makefile -+++ b/lib/Makefile -@@ -160,6 +160,7 @@ lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o - lib-$(CONFIG_DECOMPRESS_XZ) += decompress_unxz.o - lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o - lib-$(CONFIG_DECOMPRESS_LZ4) += decompress_unlz4.o -+lib-$(CONFIG_DECOMPRESS_ZSTD) += decompress_unzstd.o - - obj-$(CONFIG_TEXTSEARCH) += textsearch.o - obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o -diff --git a/lib/decompress.c b/lib/decompress.c -index 857ab1af1..ab3fc90ff 100644 ---- a/lib/decompress.c -+++ b/lib/decompress.c -@@ -13,6 +13,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -37,6 +38,9 @@ - #ifndef CONFIG_DECOMPRESS_LZ4 - # define unlz4 NULL - #endif -+#ifndef CONFIG_DECOMPRESS_ZSTD -+# define unzstd NULL -+#endif - - struct compress_format { - unsigned char magic[2]; -@@ -52,6 +56,7 @@ static const struct compress_format compressed_formats[] __initconst = { - { {0xfd, 0x37}, "xz", unxz }, - { {0x89, 0x4c}, "lzo", unlzo }, - { {0x02, 0x21}, "lz4", unlz4 }, -+ { {0x28, 0xb5}, "zstd", unzstd }, - { {0, 0}, NULL, NULL } - }; - -diff --git a/lib/decompress_unzstd.c b/lib/decompress_unzstd.c -new file mode 100644 -index 000000000..f317afab5 ---- /dev/null -+++ b/lib/decompress_unzstd.c -@@ -0,0 +1,342 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+/* -+ * Important notes about in-place decompression -+ * -+ * At least on x86, the kernel is decompressed in place: the compressed data -+ * is placed to the end of the output buffer, and the decompressor overwrites -+ * most of the compressed data. There must be enough safety margin to -+ * guarantee that the write position is always behind the read position. -+ * -+ * The safety margin for ZSTD with a 128 KB block size is calculated below. -+ * Note that the margin with ZSTD is bigger than with GZIP or XZ! -+ * -+ * The worst case for in-place decompression is that the beginning of -+ * the file is compressed extremely well, and the rest of the file is -+ * uncompressible. Thus, we must look for worst-case expansion when the -+ * compressor is encoding uncompressible data. -+ * -+ * The structure of the .zst file in case of a compresed kernel is as follows. -+ * Maximum sizes (as bytes) of the fields are in parenthesis. -+ * -+ * Frame Header: (18) -+ * Blocks: (N) -+ * Checksum: (4) -+ * -+ * The frame header and checksum overhead is at most 22 bytes. -+ * -+ * ZSTD stores the data in blocks. Each block has a header whose size is -+ * a 3 bytes. After the block header, there is up to 128 KB of payload. -+ * The maximum uncompressed size of the payload is 128 KB. The minimum -+ * uncompressed size of the payload is never less than the payload size -+ * (excluding the block header). -+ * -+ * The assumption, that the uncompressed size of the payload is never -+ * smaller than the payload itself, is valid only when talking about -+ * the payload as a whole. It is possible that the payload has parts where -+ * the decompressor consumes more input than it produces output. Calculating -+ * the worst case for this would be tricky. Instead of trying to do that, -+ * let's simply make sure that the decompressor never overwrites any bytes -+ * of the payload which it is currently reading. -+ * -+ * Now we have enough information to calculate the safety margin. We need -+ * - 22 bytes for the .zst file format headers; -+ * - 3 bytes per every 128 KiB of uncompressed size (one block header per -+ * block); and -+ * - 128 KiB (biggest possible zstd block size) to make sure that the -+ * decompressor never overwrites anything from the block it is currently -+ * reading. -+ * -+ * We get the following formula: -+ * -+ * safety_margin = 22 + uncompressed_size * 3 / 131072 + 131072 -+ * <= 22 + (uncompressed_size >> 15) + 131072 -+ */ -+ -+/* -+ * Preboot environments #include "path/to/decompress_unzstd.c". -+ * All of the source files we depend on must be #included. -+ * zstd's only source dependeny is xxhash, which has no source -+ * dependencies. -+ * -+ * zstd and xxhash avoid declaring themselves as modules -+ * when ZSTD_PREBOOT and XXH_PREBOOT are defined. -+ */ -+#ifdef STATIC -+# define ZSTD_PREBOOT -+# define XXH_PREBOOT -+# include "xxhash.c" -+# include "zstd/entropy_common.c" -+# include "zstd/fse_decompress.c" -+# include "zstd/huf_decompress.c" -+# include "zstd/zstd_common.c" -+# include "zstd/decompress.c" -+#endif -+ -+#include -+#include -+#include -+ -+/* 128MB is the maximum window size supported by zstd. */ -+#define ZSTD_WINDOWSIZE_MAX (1 << ZSTD_WINDOWLOG_MAX) -+/* Size of the input and output buffers in multi-call mode. -+ * Pick a larger size because it isn't used during kernel decompression, -+ * since that is single pass, and we have to allocate a large buffer for -+ * zstd's window anyways. The larger size speeds up initramfs decompression. -+ */ -+#define ZSTD_IOBUF_SIZE (1 << 17) -+ -+static int INIT handle_zstd_error(size_t ret, void (*error)(char *x)) -+{ -+ const int err = ZSTD_getErrorCode(ret); -+ -+ if (!ZSTD_isError(ret)) -+ return 0; -+ -+ switch (err) { -+ case ZSTD_error_memory_allocation: -+ error("ZSTD decompressor ran out of memory"); -+ break; -+ case ZSTD_error_prefix_unknown: -+ error("Input is not in the ZSTD format (wrong magic bytes)"); -+ break; -+ case ZSTD_error_dstSize_tooSmall: -+ case ZSTD_error_corruption_detected: -+ case ZSTD_error_checksum_wrong: -+ error("ZSTD-compressed data is corrupt"); -+ break; -+ default: -+ error("ZSTD-compressed data is probably corrupt"); -+ break; -+ } -+ return -1; -+} -+ -+/* -+ * Handle the case where we have the entire input and output in one segment. -+ * We can allocate less memory (no circular buffer for the sliding window), -+ * and avoid some memcpy() calls. -+ */ -+static int INIT decompress_single(const u8 *in_buf, long in_len, u8 *out_buf, -+ long out_len, long *in_pos, -+ void (*error)(char *x)) -+{ -+ const size_t wksp_size = ZSTD_DCtxWorkspaceBound(); -+ void *wksp = large_malloc(wksp_size); -+ ZSTD_DCtx *dctx = ZSTD_initDCtx(wksp, wksp_size); -+ int err; -+ size_t ret; -+ -+ if (dctx == NULL) { -+ error("Out of memory while allocating ZSTD_DCtx"); -+ err = -1; -+ goto out; -+ } -+ /* -+ * Find out how large the frame actually is, there may be junk at -+ * the end of the frame that ZSTD_decompressDCtx() can't handle. -+ */ -+ ret = ZSTD_findFrameCompressedSize(in_buf, in_len); -+ err = handle_zstd_error(ret, error); -+ if (err) -+ goto out; -+ in_len = (long)ret; -+ -+ ret = ZSTD_decompressDCtx(dctx, out_buf, out_len, in_buf, in_len); -+ err = handle_zstd_error(ret, error); -+ if (err) -+ goto out; -+ -+ if (in_pos != NULL) -+ *in_pos = in_len; -+ -+ err = 0; -+out: -+ if (wksp != NULL) -+ large_free(wksp); -+ return err; -+} -+ -+static int INIT __unzstd(unsigned char *in_buf, long in_len, -+ long (*fill)(void*, unsigned long), -+ long (*flush)(void*, unsigned long), -+ unsigned char *out_buf, long out_len, -+ long *in_pos, -+ void (*error)(char *x)) -+{ -+ ZSTD_inBuffer in; -+ ZSTD_outBuffer out; -+ ZSTD_frameParams params; -+ void *in_allocated = NULL; -+ void *out_allocated = NULL; -+ void *wksp = NULL; -+ size_t wksp_size; -+ ZSTD_DStream *dstream; -+ int err; -+ size_t ret; -+ -+ if (out_len == 0) -+ out_len = LONG_MAX; /* no limit */ -+ -+ if (fill == NULL && flush == NULL) -+ /* -+ * We can decompress faster and with less memory when we have a -+ * single chunk. -+ */ -+ return decompress_single(in_buf, in_len, out_buf, out_len, -+ in_pos, error); -+ -+ /* -+ * If in_buf is not provided, we must be using fill(), so allocate -+ * a large enough buffer. If it is provided, it must be at least -+ * ZSTD_IOBUF_SIZE large. -+ */ -+ if (in_buf == NULL) { -+ in_allocated = large_malloc(ZSTD_IOBUF_SIZE); -+ if (in_allocated == NULL) { -+ error("Out of memory while allocating input buffer"); -+ err = -1; -+ goto out; -+ } -+ in_buf = in_allocated; -+ in_len = 0; -+ } -+ /* Read the first chunk, since we need to decode the frame header. */ -+ if (fill != NULL) -+ in_len = fill(in_buf, ZSTD_IOBUF_SIZE); -+ if (in_len < 0) { -+ error("ZSTD-compressed data is truncated"); -+ err = -1; -+ goto out; -+ } -+ /* Set the first non-empty input buffer. */ -+ in.src = in_buf; -+ in.pos = 0; -+ in.size = in_len; -+ /* Allocate the output buffer if we are using flush(). */ -+ if (flush != NULL) { -+ out_allocated = large_malloc(ZSTD_IOBUF_SIZE); -+ if (out_allocated == NULL) { -+ error("Out of memory while allocating output buffer"); -+ err = -1; -+ goto out; -+ } -+ out_buf = out_allocated; -+ out_len = ZSTD_IOBUF_SIZE; -+ } -+ /* Set the output buffer. */ -+ out.dst = out_buf; -+ out.pos = 0; -+ out.size = out_len; -+ -+ /* -+ * We need to know the window size to allocate the ZSTD_DStream. -+ * Since we are streaming, we need to allocate a buffer for the sliding -+ * window. The window size varies from 1 KB to ZSTD_WINDOWSIZE_MAX -+ * (8 MB), so it is important to use the actual value so as not to -+ * waste memory when it is smaller. -+ */ -+ ret = ZSTD_getFrameParams(¶ms, in.src, in.size); -+ err = handle_zstd_error(ret, error); -+ if (err) -+ goto out; -+ if (ret != 0) { -+ error("ZSTD-compressed data has an incomplete frame header"); -+ err = -1; -+ goto out; -+ } -+ if (params.windowSize > ZSTD_WINDOWSIZE_MAX) { -+ error("ZSTD-compressed data has too large a window size"); -+ err = -1; -+ goto out; -+ } -+ -+ /* -+ * Allocate the ZSTD_DStream now that we know how much memory is -+ * required. -+ */ -+ wksp_size = ZSTD_DStreamWorkspaceBound(params.windowSize); -+ wksp = large_malloc(wksp_size); -+ dstream = ZSTD_initDStream(params.windowSize, wksp, wksp_size); -+ if (dstream == NULL) { -+ error("Out of memory while allocating ZSTD_DStream"); -+ err = -1; -+ goto out; -+ } -+ -+ /* -+ * Decompression loop: -+ * Read more data if necessary (error if no more data can be read). -+ * Call the decompression function, which returns 0 when finished. -+ * Flush any data produced if using flush(). -+ */ -+ if (in_pos != NULL) -+ *in_pos = 0; -+ do { -+ /* -+ * If we need to reload data, either we have fill() and can -+ * try to get more data, or we don't and the input is truncated. -+ */ -+ if (in.pos == in.size) { -+ if (in_pos != NULL) -+ *in_pos += in.pos; -+ in_len = fill ? fill(in_buf, ZSTD_IOBUF_SIZE) : -1; -+ if (in_len < 0) { -+ error("ZSTD-compressed data is truncated"); -+ err = -1; -+ goto out; -+ } -+ in.pos = 0; -+ in.size = in_len; -+ } -+ /* Returns zero when the frame is complete. */ -+ ret = ZSTD_decompressStream(dstream, &out, &in); -+ err = handle_zstd_error(ret, error); -+ if (err) -+ goto out; -+ /* Flush all of the data produced if using flush(). */ -+ if (flush != NULL && out.pos > 0) { -+ if (out.pos != flush(out.dst, out.pos)) { -+ error("Failed to flush()"); -+ err = -1; -+ goto out; -+ } -+ out.pos = 0; -+ } -+ } while (ret != 0); -+ -+ if (in_pos != NULL) -+ *in_pos += in.pos; -+ -+ err = 0; -+out: -+ if (in_allocated != NULL) -+ large_free(in_allocated); -+ if (out_allocated != NULL) -+ large_free(out_allocated); -+ if (wksp != NULL) -+ large_free(wksp); -+ return err; -+} -+ -+#ifndef ZSTD_PREBOOT -+STATIC int INIT unzstd(unsigned char *buf, long len, -+ long (*fill)(void*, unsigned long), -+ long (*flush)(void*, unsigned long), -+ unsigned char *out_buf, -+ long *pos, -+ void (*error)(char *x)) -+{ -+ return __unzstd(buf, len, fill, flush, out_buf, 0, pos, error); -+} -+#else -+STATIC int INIT __decompress(unsigned char *buf, long len, -+ long (*fill)(void*, unsigned long), -+ long (*flush)(void*, unsigned long), -+ unsigned char *out_buf, long out_len, -+ long *pos, -+ void (*error)(char *x)) -+{ -+ return __unzstd(buf, len, fill, flush, out_buf, out_len, pos, error); -+} -+#endif --- -2.27.0.rc0 - - -From 9bf3c117ce579a699388ad7877b06be37f9f74b0 Mon Sep 17 00:00:00 2001 -From: Nick Terrell -Date: Tue, 31 Mar 2020 22:39:09 -0700 -Subject: [PATCH 4/9] init: add support for zstd compressed kernel - -* Adds the zstd cmd to scripts/Makefile.lib -* Adds the HAVE_KERNEL_ZSTD and KERNEL_ZSTD options - -Architecture specific support is still needed for decompression. - -Reviewed-by: Kees Cook -Tested-by: Sedat Dilek -Signed-off-by: Nick Terrell ---- - init/Kconfig | 15 ++++++++++++++- - scripts/Makefile.lib | 15 +++++++++++++++ - 2 files changed, 29 insertions(+), 1 deletion(-) - -diff --git a/init/Kconfig b/init/Kconfig -index 4f717bfdb..b7c280818 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -173,13 +173,16 @@ config HAVE_KERNEL_LZO - config HAVE_KERNEL_LZ4 - bool - -+config HAVE_KERNEL_ZSTD -+ bool -+ - config HAVE_KERNEL_UNCOMPRESSED - bool - - choice - prompt "Kernel compression mode" - default KERNEL_GZIP -- depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 || HAVE_KERNEL_UNCOMPRESSED -+ depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 || HAVE_KERNEL_ZSTD || HAVE_KERNEL_UNCOMPRESSED - help - The linux kernel is a kind of self-extracting executable. - Several compression algorithms are available, which differ -@@ -258,6 +261,16 @@ config KERNEL_LZ4 - is about 8% bigger than LZO. But the decompression speed is - faster than LZO. - -+config KERNEL_ZSTD -+ bool "ZSTD" -+ depends on HAVE_KERNEL_ZSTD -+ help -+ ZSTD is a compression algorithm targeting intermediate compression -+ with fast decompression speed. It will compress better than GZIP and -+ decompress around the same speed as LZO, but slower than LZ4. You -+ will need at least 192 KB RAM or more for booting. The zstd command -+ line tools is required for compression. -+ - config KERNEL_UNCOMPRESSED - bool "None" - depends on HAVE_KERNEL_UNCOMPRESSED -diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib -index f24ff5a90..373b1f9ab 100644 ---- a/scripts/Makefile.lib -+++ b/scripts/Makefile.lib -@@ -394,6 +394,21 @@ quiet_cmd_xzkern = XZKERN $@ - quiet_cmd_xzmisc = XZMISC $@ - cmd_xzmisc = cat $(real-prereqs) | xz --check=crc32 --lzma2=dict=1MiB > $@ - -+# ZSTD -+# --------------------------------------------------------------------------- -+# Appends the uncompressed size of the data using size_append. The .zst -+# format has the size information available at the beginning of the file too, -+# but it's in a more complex format and it's good to avoid changing the part -+# of the boot code that reads the uncompressed size. -+# Note that the bytes added by size_append will make the zstd tool think that -+# the file is corrupt. This is expected. -+ -+quiet_cmd_zstd = ZSTD $@ -+cmd_zstd = (cat $(filter-out FORCE,$^) | \ -+ zstd -19 && \ -+ $(call size_append, $(filter-out FORCE,$^))) > $@ || \ -+ (rm -f $@ ; false) -+ - # ASM offsets - # --------------------------------------------------------------------------- - --- -2.27.0.rc0 - - -From 1f95b525c42775a393bf9525767748f55fe38b53 Mon Sep 17 00:00:00 2001 -From: Nick Terrell -Date: Tue, 31 Mar 2020 22:39:10 -0700 -Subject: [PATCH 5/9] usr: add support for zstd compressed initramfs - -* Add support for a zstd compressed initramfs. -* Add compression for compressing built-in initramfs with zstd. - -I have tested this patch by boot testing with buildroot and QEMU. -Specifically, I booted the kernel with both a zstd and gzip compressed -initramfs, both built into the kernel and separate. I ensured that the -correct compression algorithm was used. I tested on arm, aarch64, i386, -and x86_64. - -This patch has been tested in production on aarch64 and x86_64 devices. - -Additionally, I have performance measurements from internal use in -production. On an aarch64 device we saw 19 second boot time improvement -from switching from lzma to zstd (27 seconds to 8 seconds). On an x86_64 -device we saw a 9 second boot time reduction from switching from xz to -zstd. - -Reviewed-by: Kees Cook -Tested-by: Sedat Dilek -Signed-off-by: Nick Terrell ---- - usr/Kconfig | 20 ++++++++++++++++++++ - usr/Makefile | 1 + - 2 files changed, 21 insertions(+) - -diff --git a/usr/Kconfig b/usr/Kconfig -index 96afb03b6..2599bc21c 100644 ---- a/usr/Kconfig -+++ b/usr/Kconfig -@@ -100,6 +100,15 @@ config RD_LZ4 - Support loading of a LZ4 encoded initial ramdisk or cpio buffer - If unsure, say N. - -+config RD_ZSTD -+ bool "Support initial ramdisk/ramfs compressed using ZSTD" -+ default y -+ depends on BLK_DEV_INITRD -+ select DECOMPRESS_ZSTD -+ help -+ Support loading of a ZSTD encoded initial ramdisk or cpio buffer. -+ If unsure, say N. -+ - choice - prompt "Built-in initramfs compression mode" - depends on INITRAMFS_SOURCE != "" -@@ -196,6 +205,17 @@ config INITRAMFS_COMPRESSION_LZ4 - If you choose this, keep in mind that most distros don't provide lz4 - by default which could cause a build failure. - -+config INITRAMFS_COMPRESSION_ZSTD -+ bool "ZSTD" -+ depends on RD_ZSTD -+ help -+ ZSTD is a compression algorithm targeting intermediate compression -+ with fast decompression speed. It will compress better than GZIP and -+ decompress around the same speed as LZO, but slower than LZ4. -+ -+ If you choose this, keep in mind that you may need to install the zstd -+ tool to be able to compress the initram. -+ - config INITRAMFS_COMPRESSION_NONE - bool "None" - help -@@ -221,10 +195,12 @@ - default ".xz" if INITRAMFS_COMPRESSION_XZ - default ".lzo" if INITRAMFS_COMPRESSION_LZO - default ".lz4" if INITRAMFS_COMPRESSION_LZ4 -+ default ".zst" if INITRAMFS_COMPRESSION_ZSTD - default ".gz" if RD_GZIP - default ".lz4" if RD_LZ4 - default ".lzo" if RD_LZO - default ".xz" if RD_XZ - default ".lzma" if RD_LZMA - default ".bz2" if RD_BZIP2 -+ default ".zst" if RD_ZSTD - default "" --- -2.27.0.rc0 - - -From b87b45972a43502a4a916e014e13b74997f824aa Mon Sep 17 00:00:00 2001 -From: Nick Terrell -Date: Tue, 31 Mar 2020 22:39:11 -0700 -Subject: [PATCH 6/9] x86: bump ZO_z_extra_bytes margin for zstd - -Bump the ZO_z_extra_bytes margin for zstd. - -Zstd needs 3 bytes per 128 KB, and has a 22 byte fixed overhead. -Zstd needs to maintain 128 KB of space at all times, since that is -the maximum block size. See the comments regarding in-place -decompression added in lib/decompress_unzstd.c for details. - -Reviewed-by: Kees Cook -Tested-by: Sedat Dilek -Signed-off-by: Nick Terrell ---- - arch/x86/boot/header.S | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S -index 97d9b6d6c..b820875c5 100644 ---- a/arch/x86/boot/header.S -+++ b/arch/x86/boot/header.S -@@ -536,8 +536,14 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr - # the size-dependent part now grows so fast. - # - # extra_bytes = (uncompressed_size >> 8) + 65536 -+# -+# ZSTD compressed data grows by at most 3 bytes per 128K, and only has a 22 -+# byte fixed overhead but has a maximum block size of 128K, so it needs a -+# larger margin. -+# -+# extra_bytes = (uncompressed_size >> 8) + 131072 - --#define ZO_z_extra_bytes ((ZO_z_output_len >> 8) + 65536) -+#define ZO_z_extra_bytes ((ZO_z_output_len >> 8) + 131072) - #if ZO_z_output_len > ZO_z_input_len - # define ZO_z_extract_offset (ZO_z_output_len + ZO_z_extra_bytes - \ - ZO_z_input_len) --- -2.27.0.rc0 - - -From 8b89e11b22775c5a3bad1d36b785baf781289304 Mon Sep 17 00:00:00 2001 -From: Nick Terrell -Date: Tue, 31 Mar 2020 22:39:12 -0700 -Subject: [PATCH 7/9] x86: Add support for ZSTD compressed kernel - -* Add support for zstd compressed kernel -* Bump the heap size for zstd. -* Update the documentation. - -Integrates the ZSTD decompression code to the x86 pre-boot code. - -Zstandard requires slightly more memory during the kernel decompression -on x86 (192 KB vs 64 KB), and the memory usage is independent of the -window size. - -This patch has been boot tested with both a zstd and gzip compressed -kernel on i386 and x86_64 using buildroot and QEMU. - -Additionally, this has been tested in production on x86_64 devices. -We saw a 2 second boot time reduction by switching kernel compression -from xz to zstd. - -Reviewed-by: Kees Cook -Tested-by: Sedat Dilek -Signed-off-by: Nick Terrell ---- - Documentation/x86/boot.rst | 6 +++--- - arch/x86/Kconfig | 1 + - arch/x86/boot/compressed/Makefile | 5 ++++- - arch/x86/boot/compressed/misc.c | 4 ++++ - arch/x86/include/asm/boot.h | 6 ++++-- - 5 files changed, 16 insertions(+), 6 deletions(-) - -diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst -index c9c201596..cedcf4d49 100644 ---- a/Documentation/x86/boot.rst -+++ b/Documentation/x86/boot.rst -@@ -786,9 +786,9 @@ Protocol: 2.08+ - uncompressed data should be determined using the standard magic - numbers. The currently supported compression formats are gzip - (magic numbers 1F 8B or 1F 9E), bzip2 (magic number 42 5A), LZMA -- (magic number 5D 00), XZ (magic number FD 37), and LZ4 (magic number -- 02 21). The uncompressed payload is currently always ELF (magic -- number 7F 45 4C 46). -+ (magic number 5D 00), XZ (magic number FD 37), LZ4 (magic number -+ 02 21) and ZSTD (magic number 28 B5). The uncompressed payload is -+ currently always ELF (magic number 7F 45 4C 46). - - ============ ============== - Field name: payload_length -diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index beea77046..12d88997a 100644 ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -183,6 +183,7 @@ config X86 - select HAVE_KERNEL_LZMA - select HAVE_KERNEL_LZO - select HAVE_KERNEL_XZ -+ select HAVE_KERNEL_ZSTD - select HAVE_KPROBES - select HAVE_KPROBES_ON_FTRACE - select HAVE_FUNCTION_ERROR_INJECTION -diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile -index 26050ae0b..8233f598f 100644 ---- a/arch/x86/boot/compressed/Makefile -+++ b/arch/x86/boot/compressed/Makefile -@@ -24,7 +24,7 @@ OBJECT_FILES_NON_STANDARD := y - KCOV_INSTRUMENT := n - - targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ -- vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 -+ vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 vmlinux.bin.zst - - KBUILD_CFLAGS := -m$(BITS) -O2 - KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC) -@@ -145,6 +145,8 @@ $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE - $(call if_changed,lzo) - $(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE - $(call if_changed,lz4) -+$(obj)/vmlinux.bin.zst: $(vmlinux.bin.all-y) FORCE -+ $(call if_changed,zstd) - - suffix-$(CONFIG_KERNEL_GZIP) := gz - suffix-$(CONFIG_KERNEL_BZIP2) := bz2 -@@ -152,6 +154,7 @@ suffix-$(CONFIG_KERNEL_LZMA) := lzma - suffix-$(CONFIG_KERNEL_XZ) := xz - suffix-$(CONFIG_KERNEL_LZO) := lzo - suffix-$(CONFIG_KERNEL_LZ4) := lz4 -+suffix-$(CONFIG_KERNEL_ZSTD) := zst - - quiet_cmd_mkpiggy = MKPIGGY $@ - cmd_mkpiggy = $(obj)/mkpiggy $< > $@ -diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c -index 9652d5c2a..39e592d0e 100644 ---- a/arch/x86/boot/compressed/misc.c -+++ b/arch/x86/boot/compressed/misc.c -@@ -77,6 +77,10 @@ static int lines, cols; - #ifdef CONFIG_KERNEL_LZ4 - #include "../../../../lib/decompress_unlz4.c" - #endif -+ -+#ifdef CONFIG_KERNEL_ZSTD -+#include "../../../../lib/decompress_unzstd.c" -+#endif - /* - * NOTE: When adding a new decompressor, please update the analysis in - * ../header.S. -diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h -index 680c32036..d6dd43d25 100644 ---- a/arch/x86/include/asm/boot.h -+++ b/arch/x86/include/asm/boot.h -@@ -24,9 +24,11 @@ - # error "Invalid value for CONFIG_PHYSICAL_ALIGN" - #endif - --#ifdef CONFIG_KERNEL_BZIP2 -+#if defined(CONFIG_KERNEL_BZIP2) - # define BOOT_HEAP_SIZE 0x400000 --#else /* !CONFIG_KERNEL_BZIP2 */ -+#elif defined(CONFIG_KERNEL_ZSTD) -+# define BOOT_HEAP_SIZE 0x30000 -+#else - # define BOOT_HEAP_SIZE 0x10000 - #endif - --- -2.27.0.rc0 - - -From 0d7151172b1ffad727607d69acc2b51a6086def1 Mon Sep 17 00:00:00 2001 -From: Adam Borowski -Date: Tue, 31 Mar 2020 22:39:13 -0700 -Subject: [PATCH 8/9] .gitignore: add ZSTD-compressed files - -For now, that's arch/x86/boot/compressed/vmlinux.bin.zst but probably more -will come, thus let's be consistent with all other compressors. - -Tested-by: Sedat Dilek -Reviewed-by: Kees Cook -Signed-off-by: Nick Terrell -Signed-off-by: Adam Borowski ---- - .gitignore | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/.gitignore b/.gitignore -index 72ef86a55..edb0191c2 100644 ---- a/.gitignore -+++ b/.gitignore -@@ -43,6 +43,7 @@ - *.tab.[ch] - *.tar - *.xz -+*.zst - Module.symvers - modules.builtin - modules.order --- -2.27.0.rc0 - - -From 9687fd64db136fbdb2e7805e0b6354620516597c Mon Sep 17 00:00:00 2001 -From: NihilisticPandemoniun - <152417+nihilisticpandemonium@users.noreply.github.com> -Date: Thu, 19 Dec 2019 00:08:37 -0800 -Subject: [PATCH 9/9] init: add support for zstd compressed modules - -Signed-off-by: Alexandre Frade ---- - Makefile | 7 +++++-- - init/Kconfig | 9 ++++++--- - 2 files changed, 11 insertions(+), 5 deletions(-) - -diff --git a/Makefile b/Makefile -index d25221966..f812df12f 100644 ---- a/Makefile -+++ b/Makefile -@@ -972,8 +972,8 @@ endif # INSTALL_MOD_STRIP - export mod_strip_cmd - - # CONFIG_MODULE_COMPRESS, if defined, will cause module to be compressed --# after they are installed in agreement with CONFIG_MODULE_COMPRESS_GZIP --# or CONFIG_MODULE_COMPRESS_XZ. -+# after they are installed in agreement with CONFIG_MODULE_COMPRESS_GZIP, -+# CONFIG_MODULE_COMPRESS_XZ, or CONFIG_MODULE_COMPRESS_ZSTD. - - mod_compress_cmd = true - ifdef CONFIG_MODULE_COMPRESS -@@ -983,6 +983,9 @@ ifdef CONFIG_MODULE_COMPRESS - ifdef CONFIG_MODULE_COMPRESS_XZ - mod_compress_cmd = xz -f - endif # CONFIG_MODULE_COMPRESS_XZ -+ ifdef CONFIG_MODULE_COMPRESS_ZSTD -+ mod_compress_cmd = zstd -T0 -20 --ultra --rm -f -+ endif - endif # CONFIG_MODULE_COMPRESS - export mod_compress_cmd - -diff --git a/init/Kconfig b/init/Kconfig -index b7c280818..3d3b72926 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -2142,8 +2142,8 @@ config MODULE_COMPRESS - bool "Compress modules on installation" - help - -- Compresses kernel modules when 'make modules_install' is run; gzip or -- xz depending on "Compression algorithm" below. -+ Compresses kernel modules when 'make modules_install' is run; gzip, -+ xz, or zstd depending on "Compression algorithm" below. - - module-init-tools MAY support gzip, and kmod MAY support gzip and xz. - -@@ -2165,7 +2165,7 @@ choice - This determines which sort of compression will be used during - 'make modules_install'. - -- GZIP (default) and XZ are supported. -+ GZIP (default), XZ, and ZSTD are supported. - - config MODULE_COMPRESS_GZIP - bool "GZIP" -@@ -2173,6 +2173,9 @@ config MODULE_COMPRESS_GZIP - config MODULE_COMPRESS_XZ - bool "XZ" - -+config MODULE_COMPRESS_ZSTD -+ bool "ZSTD" -+ - endchoice - - config MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS --- -2.27.0.rc0 - diff --git a/sys-kernel/calculate-sources/Manifest b/sys-kernel/calculate-sources/Manifest index f60b71fd8..c0d1e15a5 100644 --- a/sys-kernel/calculate-sources/Manifest +++ b/sys-kernel/calculate-sources/Manifest @@ -1,9 +1,6 @@ DIST linux-5.10.tar.xz 116606704 BLAKE2B b923d7b66309224f42f35f8a5fa219421b0a9362d2adacdadd8d96251f61f7230878ea297a269a7f3b3c56830f0b177e068691e1d7f88501a05653b0a13274d1 SHA512 95bc137d0cf9148da6a9d1f1a878698dc27b40f68e22c597544010a6c591ce1b256f083489d3ff45ff77753289b535135590194d88ef9f007d0ddab3d74de70e DIST linux-5.14.tar.xz 120669872 BLAKE2B 0047f5aaa3940dff97f4055ef544faafbbb5282128e6afe21d2f47d8dc8c395806a17016febfa050117d16f59e74b882cb8b9c5011d68f119c230d0a4d120524 SHA512 8e4f3ec3d36f774280f75dc7b004a43e09417af58f12e9c9f8348976659d4cfda7ad905f306f43fed66a27922e5c45db22e46bbfa7a0b9f365012380de3b6f64 DIST linux-5.15.tar.xz 121913744 BLAKE2B 3921274b23f7938abdf3ed9334534b4581e13d7484303d3a5280eddb038999aaa8b836666a487472d9c4a219af0f06b9fecccaf348fb5510ab8762f4ef4b7e83 SHA512 d25ad40b5bcd6a4c6042fd0fd84e196e7a58024734c3e9a484fd0d5d54a0c1d87db8a3c784eff55e43b6f021709dc685eb0efa18d2aec327e4f88a79f405705a -DIST linux-5.4.tar.xz 109441440 BLAKE2B 193bc4a3147e147d5529956164ec4912fad5d5c6fb07f909ff1056e57235834173194afc686993ccd785c1ff15804de0961b625f3008cca0e27493efc8f27b13 SHA512 9f60f77e8ab972b9438ac648bed17551c8491d6585a5e85f694b2eaa4c623fbc61eb18419b2656b6795eac5deec0edaa04547fc6723fbda52256bd7f3486898f DIST patch-5.10.73.xz 2213532 BLAKE2B 8069486891bd8bd2d6f15204f6c18848c374a650254a4b381126407fdc3136b625f8e13e3a3ba93817f71c80297a5ad7c4f25a8f43330f0017f37396c033cf95 SHA512 630d564b49ea9e5d67ac2395b312e653ba8aa28011cd5565ad506b0296c7598eebbf2e7a792e06b4c90e75bb764e0c2dc0246b11d496e7ad5533c0e177d887cc DIST patch-5.10.77.xz 2272320 BLAKE2B 209beaa7f6cbc12e9b40370cad2fa30cb062be843aa669cff1908742ceed5eb65a1f1b0d5b2e55379907065b5d86c431f8db80ef01e58bf77e4552e4163e7938 SHA512 51d222ab9622e37dcfb3ec4bba867b043880179e38fb991da48b86183a83f3f1af00485b461f757a7e58899e062738b9e8f7caced550dd2660fef33e5afe3262 DIST patch-5.14.16.xz 602960 BLAKE2B 467f64556136fb2f961042f9a99a8e80f651b4f3476e88d908f04193a8fed7975e8685e4867cdd16e2e0912e0e22931727502958c61ba0940cd3bc4395a7ce8e SHA512 c909aea0ca239d32e06a8602b154397de3885588410738d099d6a13f0149766fd106e6e7e235b0c4a3873d8155e403a371d82a85af5592c619d1ce683800c811 -DIST patch-5.4.124.xz 3006220 BLAKE2B 394645f0cf7898c98d416e93858b3effe171d5bdbc968bccd876c41de527ce0221331a06744cc081a05407d3b539d7ab83ae526ff1cc99e9e17629af7b968932 SHA512 9a8a5388d921c55a6f620f2da0528c4d0ed4487cfa58ac876b7b9625247860e3b25bbfcd39b4ae73f34c2d2b8a45b155a149613a650a1306bdab4bad57f8f9e9 -DIST patch-5.4.157.xz 3446952 BLAKE2B e443b31e2c505886009b8625638d678d974f6223291c3ef079189df341f3700cca208b350150e14d284269ec0143d228277d1a0edb35e59808e268ffe3fddfc0 SHA512 b274006c15c7a09ad459f59d90fe88128b15a4a4eaf984bce97ebc5b701fa83a28ebfb5fb44d7c5086f73ee99a212b137b2de06a5bfc1b1c889a63f8840afeb2 diff --git a/sys-kernel/calculate-sources/calculate-sources-5.4.124.ebuild b/sys-kernel/calculate-sources/calculate-sources-5.4.124.ebuild deleted file mode 100644 index 6ac1be698..000000000 --- a/sys-kernel/calculate-sources/calculate-sources-5.4.124.ebuild +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 1999-2015 Gentoo Foundation -# Distributed under the terms of the GNU General Public License v2 -# $Header: $ - -EAPI=7 -ETYPE="sources" - -inherit calculate-kernel-8 eutils - -DESCRIPTION="Calculate Linux kernel image" -KEYWORDS="amd64" -HOMEPAGE="http://www.calculate-linux.org" - -SRC_URI="${KERNEL_URI} ${ARCH_URI}" - -IUSE="fsync muqss uksm" - -src_unpack() { - calculate-kernel-8_src_unpack -} - -pkg_postinst() { - calculate-kernel-8_pkg_postinst -} diff --git a/sys-kernel/calculate-sources/calculate-sources-5.4.157.ebuild b/sys-kernel/calculate-sources/calculate-sources-5.4.157.ebuild deleted file mode 100644 index 63c59c18b..000000000 --- a/sys-kernel/calculate-sources/calculate-sources-5.4.157.ebuild +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 1999-2015 Gentoo Foundation -# Distributed under the terms of the GNU General Public License v2 -# $Header: $ - -EAPI=7 -ETYPE="sources" - -inherit calculate-kernel-8 eutils - -DESCRIPTION="Calculate Linux kernel image" -KEYWORDS="~amd64" -HOMEPAGE="http://www.calculate-linux.org" - -SRC_URI="${KERNEL_URI} ${ARCH_URI}" - -IUSE="fsync muqss uksm" - -src_unpack() { - calculate-kernel-8_src_unpack -} - -pkg_postinst() { - calculate-kernel-8_pkg_postinst -}