auto-krokodil/kroko-cli.sh
2022-10-27 23:41:01 +03:00

416 lines
9.4 KiB
Bash
Executable file

#!/bin/bash
set -e
set +f
set -u
set -o pipefail
# lspci -nn | grep VGA
# 01:00.0 VGA compatible controller [0300]: NVIDIA Corporation GF108 [GeForce GT 440] [10de:0de0] (rev a1)
# constants
readonly EXIT_GENERICERROR=1
readonly EXIT_ENODRIVER=10
readonly EXIT_ENOGPU=20
readonly EXIT_ENOCOMMONDRIVER=30
readonly EXIT_ENOPERM=40
# use fake output of lspci for development on hardware without Nvidia
KROKO_FAKE_LSPCI="${KROKO_FAKE_LSPCI:-0}"
# add "--repofrompath xxx,local_repo" when running from inside Anaconda
KROKO_DNF_OPTS="${KROKO_DNF_OPTS:-}"
KROKO_TMPDIR="${KROKO_TMPDIR:-$(mktemp -d)}"
trap 'rm -fr "$KROKO_TMPDIR"' EXIT
_echo_err(){
echo "$@" 1>&2
}
_echo_help() {
echo "
Usage: $0 [ get-gpus | best-driver | autoinstall ]
get-gpus — list NVIDIA GPUs
best-driver — print which NVIDIA driver best fits this system
autoinstall — automatically install the best NVIDIA driver for this system
help — print this help
Example:
$0 autoinstall
(run from root)
"
}
# shellcheck disable=SC2120
_mktemp(){
mktemp --tmpdir="$KROKO_TMPDIR" "$@"
}
# $@: args for lspci (e.g. -nn)
_lspci(){
if [ "$KROKO_FAKE_LSPCI" = 1 ]
then
echo "00:02.0 VGA compatible controller [0300]: Fake Nvidia 470 [10de:1c92] (rev 01)"
else
lspci "$@"
fi
}
# $1: string
# $2: symbol
# Count how many times a symbol is inside the string
_count_symbol_in_string(){
local o
o="$1"
local counter
counter=0
for (( i = 0; i <= ${#o}; i++ ))
do
if [ "${o:$i:1}" = "$2" ]; then
counter=$((++counter))
fi
done
echo "$counter"
}
# $1: string
# $2: number of block with between [] to extract from
# Example:
# $1: ff [ty] xz [56] yu
# if $2=1; then "ty" is returned
# if $2=2; then "56" is returned
_element_from_string(){
local o
o="$1"
local target
target="$2"
local new
new=""
local inside_value
inside_value=0
local counter
counter=0
for (( i = 0; i <= ${#o}; i++ ))
do
if [ "${o:$i:1}" = '[' ]; then
counter=$((++counter))
if [ "$counter" != "$target" ]; then
continue
fi
inside_value=1
continue
fi
if [ "${o:$i:1}" = ']' ]; then
inside_value=0
continue
fi
if [ "$inside_value" != 1 ]; then
continue
fi
# processing symbols between [ and ]
new="$new${o:$i:1}"
done
echo "$new"
}
# $1: line from lspci -nn
_extract_device_id(){
local n
n="$(_count_symbol_in_string "$1" [)"
local o
o="$(_element_from_string "$1" "$n")"
# 10de:0de0 -> 0de0
IFS=":" read -r -a arr <<< "$o"
echo "${arr[1]}"
}
# $1: line from lspci -nn
_extract_vendor_id(){
local n
n="$(_count_symbol_in_string "$1" [)"
local o
o="$(_element_from_string "$1" "$n")"
# 10de:0de0 -> 10de
IFS=":" read -r -a arr <<< "$o"
echo "${arr[0]}"
}
# Input to stdin: output of `lspci -nn`
# $1: path to file with list of NVIDIA vendor IDs
# Outputs lines about GPUs
# Comment from ubuntu-drivers-common:
# Display controllers are device class 03
# There are 4 subclasses:
# 00 VGA compatible controller
# 01 XGA compatible controller
# 02 3D controller
# 80 Display controller
_filter_gpus(){
while read -r line
do
local o
o="$(_element_from_string "$line" 1)"
# See if it is a GPU
if [ "${o:0:2}" != "03" ]; then
continue
fi
# See if it is an NVIDIA GPU
# 10de is NVIDIA's vendor ID
# XXX It is the only vendor ID?
if [ "$(_extract_vendor_id "$line")" = "10de" ]; then
echo "$line"
fi
done
}
# $1: arch (e.g. x86_64)
# $2: path to file for output
_dnf_mk_file(){
# shellcheck disable=SC2086
dnf $KROKO_DNF_OPTS repoquery \
--quiet \
--arch "$1" \
--whatprovides 'nvidia-blob-*' \
--qf 'NAME %{name}\n%{provides}' \
> "$2"
}
# $1: input file (output of _dnf_mk_file())
# $2: path to directory with temp files
_sort_provides_by_pkg(){
local file=""
while read -r line
do
if [[ "$line" =~ ^"NAME " ]]; then
local arr=()
IFS=" " read -r -a arr <<< "$line"
file="$2"/provides___"${arr[1]}"
continue
fi
echo "$line" >> "$file"
done < <(cat "$1")
}
# $1: device id
# $2: directory with files with provides
# output: nvidia390,nvidia470
_get_available_drivers(){
grep "^nvidia-blob-devid(${1}) =" "$2"/provides___* | \
awk -F ':' '{print $1}' | awk -F 'provides___' '{print $2}' | \
sort -u | \
tr '\n' ',' | sed -e 's/,$//'
}
# $1: device id
# $2: directory with files with provides
# output: nvidia470
_get_best_driver(){
grep "^nvidia-blob-devid(${1}) =" "$2"/provides___* | \
awk -F ':' '{print $1}' | awk -F 'provides___' '{print $2}' | \
sort -u -r | \
head -n1
}
# $1: string
# Convert string of `lspci -nn` to a human-readable name
_line2name(){
local o
o="$1"
local n
n="$(_count_symbol_in_string "$1" "]")"
local counter=0
local started=0
local human_name=""
for (( i = 0; i <= ${#o}; i++ ))
do
if [ "${o:$i:1}" = ']' ]; then
counter=$((++counter))
fi
if [ "$counter" = 0 ]; then
continue
fi
# Name starts after [0300]:
if [ "$started" != 1 ] && [ "${o:$i:1}" = ' ' ] && [ "${o:$i-1:1}" = ':' ] && [ "${o:$i-2:1}" = ']' ]; then
started=1
continue
fi
if [ "$started" != 1 ]; then
continue
fi
if [ "${o:$i:1}" = ' ' ] && [ "${o:$i+1:1}" = '[' ] && [ $((counter+1)) = "$n" ]; then
break
fi
human_name="${human_name}${o:$i:1}"
done
echo "$human_name"
}
_cli_get_gpus(){
local arch
arch="$(rpm -E "%_arch")"
if [ -z "$arch" ]; then
_echo_err "Error getting architecture of the host machine"
return $EXIT_GENERICERROR
fi
local o
o="$(_lspci -nn | _filter_gpus)"
if [ "$(echo "$o" | grep -c .)" -le 0 ]; then
_echo_err "No NVIDIA GPUs found"
return $EXIT_ENOGPU
fi
local big_file
big_file="$(_mktemp)"
_dnf_mk_file "$arch" "$big_file"
_sort_provides_by_pkg "$big_file" "$KROKO_TMPDIR"
while read -r line
do
local device_id
device_id="$(_extract_device_id "$line")"
if [ -z "$device_id" ]; then
_echo_err "Error extracting device ID"
continue
fi
local human_name
human_name="$(_line2name "$line")"
if [ -z "$human_name" ]; then
_echo_err "Error converting to human readable name"
return $EXIT_GENERICERROR
fi
local available_drivers
available_drivers="$(_get_available_drivers "$device_id" "$KROKO_TMPDIR")"
if [ -z "$available_drivers" ]; then
_echo_err "No drivers found for device $device_id"
return $EXIT_ENODRIVER
fi
local best_driver
best_driver="$(_get_best_driver "$device_id" "$KROKO_TMPDIR")"
if [ -z "$best_driver" ]; then
_echo_err "Error getting the best driver"
return $EXIT_GENERICERROR
fi
echo "${line};${device_id};${human_name};${available_drivers};${best_driver}"
done <<< "$o"
}
# If there are multiple GPUs, select a driver which will fit all of them
# Input: stdin from _cli_get_gpus()
_select_best_driver(){
# number of GPUs
local n=0
local drivers=()
# fill array with all suggested drivers
while read -r line
do
n=$((++n))
local arr1=()
IFS=';' read -r -a arr1 <<< "$line"
# list of drivers for this GPU
# e.g. "nvidia390,nvidia470" or "nvidia470"
[ -n "${arr1[3]}" ]
local arr2=()
IFS=',' read -r -a arr2 <<< "${arr1[3]}"
for (( i = 0; i < ${#arr2[@]}; i++ ))
do
drivers+=("${arr2[$i]}")
done
done
# count which one is repeated for most times
local arr
read -r -a arr <<< "$(
( for (( i = 0; i < ${#drivers[@]}; i++ ))
do
echo "${drivers[$i]}"
done
) | sort | uniq -c | sort -hr | head -n1)"
# if number of times it is repeated is less then number of GPUs,
# then there is no driver common for all GPUs
if [ "${arr[0]}" -lt "$n" ]; then
_echo_err "No common driver which will work with all GPUs"
return $EXIT_ENOCOMMONDRIVER
fi
echo "${arr[1]}"
}
_cli_best_driver(){
set -e
local o
# Here $EXIT_* is returned if somethung goes wrong
o="$(_cli_get_gpus)"
echo "$o" | _select_best_driver
}
_cli_autoinstall(){
set -e
local o
o="$(_cli_best_driver)"
local dnfcmd
dnfcmd="$(_mktemp)"
# shellcheck disable=SC2129
# plan upgrades to ensure system consistency and avoid trying to install
# nvidia modules for kernels which have already been removed from repos
echo "upgrade" >> "$dnfcmd"
echo "autoremove" >> "$dnfcmd"
# install nvidia driver
echo "install $o" >> "$dnfcmd"
# In case --noautoremove equivalent is set in dnf config, override it
# to ensure consustency and avoid odd errors
# (proficient users can install nvidia drivers themselves)
echo "autoremove" >> "$dnfcmd"
echo "transaction list" >> "$dnfcmd"
echo "transaction run" >> "$dnfcmd"
# --allowerasing to delete e.g. nvidia510 if nvidia515 is being installed
# Kernels for which binary nvidia modules do not exist will be removed
# XXX Is it good...?
# shellcheck disable=SC2086
dnf $KROKO_DNF_OPTS -y --allowerasing shell "$dnfcmd"
}
_check_root(){
# TODO: convert to gettext (*.po) when there are more translatable messages
# For now there is only this message
local text_en="Unable to install packages. Rerun this command from root"
local text_ru="Невозможно установить пакеты. Перезапустите эту команду от root"
local text
if [[ "${LANG:-}" =~ ru(|_.*)".UTF-8" ]]
then text="$text_ru"
else text="$text_en"
fi
if [ "$EUID" != 0 ]; then
_echo_err "$text":
local s
if command -v sudo >/dev/null 2>&1
then s=sudo
else s='#'
fi
_echo_err "$s $0 $*"
return $EXIT_ENOPERM
fi
}
_main(){
case "${1:-}" in
"get-gpus" )
_cli_get_gpus
;;
"best-driver" )
_cli_best_driver
;;
"autoinstall" )
_check_root "$@"
_cli_autoinstall
;;
"help" | "--help" | "-h" )
_echo_help
return 0
;;
* )
_echo_help
return 1
;;
esac
}
if [ "${SOURCING:-0}" != 1 ]; then
_main "$@"
fi