已经是最新一篇文章了!
已经是最后一篇文章了!
后台服务绑定 CPU 核
一个 Prod 用的服务启动升级脚本,检查了 CPU 核心数,并按照指定顺序绑定 CPU 核心,以提升性能。
#!/bin/bash
SERVICENAME=$1
VERSION=$2
BINPATH=/home/devops/$SERVICENAME/bin/
PACKAGE=/home/devops/$SERVICENAME/package/
# xxx 进程名|进程文件名
PROCESS=$SERVICENAME.exe
LOCK_FILE="/$BINPATH/.xxxxx.$PROCESS.lock"
# we think cpu binding means will enable cpu perception
USE_CPU_TOPOLOGY_SERVER_TYPE="server"
USE_CPU_TOPOLOGY_TAG="vm_use_cpu_topology:true vm_bind_cpu:true"
USE_CPU_TOPOLOGY_PROVIDER="aws"
USE_CPU_TOPOLOGY_IDC=""
USE_CPU_TOPOLOGY=
# control bind core and xdp in hybrid deployment machine
is_hybrid=0
function CheckDir() {
if [ ! -d $BINPATH ]; then sudo mkdir -p $BINPATH; fi
}
function CheckHybridDeploy() {
get_message=`curl -4 https://meta.xxxxx.co/latest/meta-data/tag --max-time 10`
match=`echo ${get_message} | grep -a "servicevosmixed:1" | wc -l`
if [ ${match} -gt 0 ]; then
is_hybrid=1
fi
echo "Is hybrid : ${is_hybrid}"
}
# 排他锁,防止相同时间内进行其它操作,比如操作时,防止zabbix触发自动拉起
function Lock() {
ls "$LOCK_FILE" >/dev/null 2>&1
if [[ $? -ne 0 ]]; then
sudo touch "$LOCK_FILE"
else
lockTime=$(stat -t "$LOCK_FILE" | awk '{print $12}')
currentTime=$(date +%s)
duration=$((currentTime - lockTime))
echo $duration
if [[ $duration -gt 20 ]]; then
sudo touch "$LOCK_FILE"
else
echo "Less than 20 seconds since last operation"
exit 1
fi
fi
}
function Unlock() {
sudo rm -f "$LOCK_FILE"
}
function NeedUseCPUTopology() { # not zero mean need bind cpu
if [[ -n ${USE_CPU_TOPOLOGY} ]]; then
return ${USE_CPU_TOPOLOGY}
fi
# check is vm
if [[ "${USE_CPU_TOPOLOGY_SERVER_TYPE}" == *"$(hostnamectl | grep Chassis | cut -d ':' -f2 | xargs)"* ]]; then
USE_CPU_TOPOLOGY=1
return ${USE_CPU_TOPOLOGY}
fi
# get machine tag
for machine_tag in $(timeout 10 curl https://meta.xxxxx.co/2022-09-13/meta-data/tag 2>/dev/null); do
if [[ "${USE_CPU_TOPOLOGY_TAG}" == *"${machine_tag}"* ]]; then
USE_CPU_TOPOLOGY=1
return ${USE_CPU_TOPOLOGY}
fi
done
# # get cluster name
# if [[ "${USE_CPU_TOPOLOGY_IDC}" == *"$(curl https://meta.xxxxx.co/2022-09-13/meta-data/cluster 2>/dev/null)"* ]]; then
# USE_CPU_TOPOLOGY=1;
# return ${USE_CPU_TOPOLOGY}
# fi
# get provider
# if [[ "${USE_CPU_TOPOLOGY_PROVIDER}" == *"$(curl https://meta.xxxxx.co/2022-09-13/meta-data/provider 2>/dev/null)"* ]]; then
# USE_CPU_TOPOLOGY=1;
# return ${USE_CPU_TOPOLOGY}
# fi
USE_CPU_TOPOLOGY=0
return ${USE_CPU_TOPOLOGY}
}
# 获取当前机器物理核心数
function GetCpuNumber() {
# if not use topology, we will presume machine have open HT, and just return all process count / 2
NeedUseCPUTopology && { echo $(($(grep -c processor /proc/cpuinfo) / 2)) && return 0; }
CORE=$(cat /proc/cpuinfo | grep "core id" | sort | uniq | wc -l)
PHYSICAL=$(cat /proc/cpuinfo | grep "physical id" | sort | uniq | wc -l)
LOGIC=$((CORE * PHYSICAL))
echo $LOGIC
}
function GetMemTotalSize() {
memTotalSize=$(cat /proc/meminfo | grep MemTotal | awk '{print $2}')
echo $memTotalSize
}
# 设置进程数
function GetProcessNumber() {
local processNumber=1
cpuNumber=$(GetCpuNumber)
memTotalSize=$(GetMemTotalSize)
if [ $cpuNumber -ge 10 ]; then
processNumber_tmp=$((cpuNumber - 2))
# 内存总容量足够的机器按满配起
if [[ $memTotalSize -ge $((processNumber_tmp * 2359296)) ]]; then
processNumber_pre=$processNumber_tmp
if [[ $approuter_running -eq 1 ]]; then
processNumber=$((processNumber_pre - 4))
else
processNumber=$processNumber_pre
fi
# 内存总容量不够的机器计算实际要起的进程数
else
processNumber_pre=$((memTotalSize / 2359296))
if [[ $approuter_running -eq 1 ]]; then
processNumber=$((processNumber_pre - 4))
else
processNumber=$processNumber_pre
fi
fi
elif [[ $cpuNumber -gt 1 ]]; then
processNumber_pre=$((cpuNumber - 1))
if [[ $approuter_running -eq 1 ]]; then
processNumber=$((processNumber_pre - 4))
else
processNumber=$processNumber_pre
fi
elif [[ $cpuNumber -eq 1 ]]; then
processNumber=1
fi
if [ $processNumber -gt 30 ]; then
processNumber=30
fi
echo $processNumber
}
function CheckApprouterProcessRunningStaus() {
approuterNumber=$(ps aux | grep -w "reuseport.exe" | grep -v grep | wc -l)
if [ $approuterNumber -gt 0 ]; then
approuter_running=1
else
approuter_running=0
fi
}
# 检查当前进程数
function CheckProcessNumber() {
processNumber=$(GetProcessNumber)
currentNumber=$(ps aux | grep -w "./$PROCESS" | grep -v 'gzip' | grep -v grep | grep -v $0 | grep -v "appRestart\|appStart\|appStop\|appUpgrade" | wc -l)
if [[ $currentNumber -ne $processNumber ]]; then
echo "current process number: $currentNumber, need process number: $processNumber"
Unlock
exit 1
fi
}
# 检查当前进程数
function CheckNoProcess() {
currentNumber=$(ps aux | grep -w "./$PROCESS" | grep -v 'gzip' | grep -v grep | grep -v $0 | grep -v "appRestart\|appStart\|appStop\|appUpgrade" | wc -l)
if [[ $currentNumber -ne 0 ]]; then
echo "current process number: $currentNumber, should be 0"
Unlock
exit 1
fi
}
# 停止进程
function Stop() {
sudo killall -q "$PROCESS"
sleep 30
echo "Stop Process finished"
CheckNoProcess
}
# 升级程序
function Upgrade() {
#检查文件是否存在
if test ! -e "$PACKAGE/$SERVICENAME.exe"; then
echo "upgrade package not exist"
exit 1
fi
#检查程序及版本
local exeVersion=$("$PACKAGE/$SERVICENAME.exe" -v | awk '{sub(/^[\t ]*/, "");print}')
local upVersion="$PROCESS"_"$exeVersion"
if [[ "$upVersion" != "$VERSION" ]]; then
echo "version mismatch, package version: $upVersion, need version: $VERSION"
Unlock
exit 1
fi
if test ! -d "$BINPATH"; then
sudo mkdir -p "$BINPATH"
fi
# copy程序到bin下
sudo cp -f "$PACKAGE/$SERVICENAME.exe" "$BINPATH/$PROCESS"
}
# 启动进程
function Start() {
cd "$BINPATH"
sudo bash -c "echo 3 > /proc/sys/vm/drop_caches"
sleep 10
processNumber=$(GetProcessNumber)
currentNumber=$(ps aux | grep -w "./$PROCESS" | grep -v 'gzip' | grep -v grep | grep -v $0 | grep -v "appRestart\|appStart\|appStop\|appUpgrade" | wc -l)
echo "Need: $processNumber, Current: $currentNumber"
run_process_command=${PROCESS}
if [ ${is_hybrid} -eq 1 ]; then
run_process_command="${run_process_command} --disable_xdp=true"
fi
for ((i = currentNumber; i < "$processNumber"; i++)); do
sudo ./$run_process_command
sleep 3
done
}
# 绑定核心
function BindingProcesstoPhysicalCores() {
NeedUseCPUTopology && return 0
if [ ${is_hybrid} -eq 1 ]; then
return 0
fi
TOTAL_CPU=$(grep -c processor /proc/cpuinfo)
CORE=$(cat /proc/cpuinfo | grep "core id" | sort | uniq | wc -l)
PHYSICAL=$(cat /proc/cpuinfo | grep "physical id" | sort | uniq | wc -l)
LOGIC=$((CORE * PHYSICAL))
PIDS=$(ps aux | grep -w "./$PROCESS" | grep -v grep | grep -v $0 | grep -v "start\|ansible" | awk '{print $2}')
if [[ $(($TOTAL_CPU / 2)) -eq $LOGIC ]]; then
if [[ $(cat /sys/devices/system/cpu/cpu0/topology/thread_siblings_list | grep - | wc -l) -eq 1 ]]; then
cpu_thread_seq="continuous" #CPU-No. vs Core ID Like CPU0-Core0_Thread0,CPU1-Core0_Thread1,CPU2-Core1_Thread0,CPU3-Core1_Thread1...
else
cpu_thread_seq="discontinuous" #CPU-No. vs Core ID Like CPU0-Core0_Thread0,CPU1-Core1_Thread0,..,CPU10-Core0_Thread1,CPU1-Core0_Thread1...
fi
elif [[ $TOTAL_CPU -eq $LOGIC ]]; then
cpu_thread_seq="na" # CPU do not enable or not support Hyper-Threading
fi
if [[ $cpu_thread_seq = "discontinuous" ]]; then
if [[ $approuter_running -eq 1 ]]; then #有app_router保留4个核心,从倒数第5个物理核开始使用
cpu=$(($LOGIC - 5))
else
cpu=$(($LOGIC - 1)) #没有app_router正常从最后1个物理核开始使用
fi
for pid in ${PIDS[@]}; do
sudo taskset -p --cpu-list $cpu $pid
cpu=$(($cpu - 1))
done
elif [[ $cpu_thread_seq = "continuous" ]]; then
if [[ $approuter_running -eq 1 ]]; then #有app_router保留4个核心即8个逻辑核,从倒数第10个逻辑核开始使用
cpu=$(($TOTAL_CPU - 10))
else
cpu=$(($TOTAL_CPU - 2)) #没有app_router正常从倒数第2个逻辑核开始用
fi
for pid in ${PIDS[@]}; do
sudo taskset -p --cpu-list $cpu $pid
cpu=$(($cpu - 2))
done
elif [[ $cpu_thread_seq = "na" ]]; then
for pid in ${PIDS[@]}; do
sudo taskset -p 0xFFFF $pid
done
fi
}
CheckApprouterProcessRunningStaus &&
CheckDir &&
CheckHybridDeploy &&
Lock &&
Stop &&
Upgrade &&
Start &&
BindingProcesstoPhysicalCores &&
CheckProcessNumber
Unlock
版权声明: 如无特别声明,本文版权归 Mr Chen 所有,转载请注明本文链接。
(采用 CC BY-NC-SA 4.0 许可协议进行授权)
本文标题:《 后台服务绑定 CPU 核 》
本文链接:https://gbcpp.github.io/service-bind-cpu.html
本文最后一次更新为 天前,文章中的某些内容可能已过时!