Skip to content

Commit cdc2cc0

Browse files
authored
detection of Intel GPU drivers corrected (#1469)
1 parent f517ee9 commit cdc2cc0

File tree

3 files changed

+195
-43
lines changed

3 files changed

+195
-43
lines changed

platform/services/installer/app/checks/resources.py

Lines changed: 63 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import logging
99
import os
10+
import re
1011
import subprocess
1112
from subprocess import CalledProcessError, TimeoutExpired
1213

@@ -160,7 +161,33 @@ def check_gpu_driver_version(config: InstallationConfig | UpgradeConfig) -> None
160161
logger.debug("GPU driver version matched.")
161162

162163

163-
def _get_intel_gpus() -> str:
164+
def _check_intel_gpu_driver(env: dict[str, str]) -> bool:
165+
"""
166+
Returns true if intel gpu driver is installed
167+
"""
168+
try:
169+
command = 'clinfo|grep "' + ResourcesChecksTexts.intel_gpu_arc_device_name + '"|grep Intel'
170+
logger.debug(f"Getting the list of Intel GPU drivers with {command}")
171+
172+
clinfo_output = subprocess.check_output( # noqa: S602 # nosec: B602
173+
command,
174+
stderr=subprocess.STDOUT,
175+
shell=True,
176+
timeout=5,
177+
env=env,
178+
).decode("utf-8")
179+
logger.debug(clinfo_output)
180+
if ResourcesChecksTexts.intel_gpu_arc_device_name in clinfo_output:
181+
return True
182+
183+
except (CalledProcessError, TimeoutExpired, FileNotFoundError) as err:
184+
logger.debug(f"Checking the installed Intel GPU driver failed with {err}")
185+
return False
186+
187+
return False
188+
189+
190+
def _get_intel_gpus() -> tuple[str, bool]: # noqa: C901
164191
"""
165192
MAX cards:
166193
Attempt to get Intel GPUs with xpu-smi
@@ -182,33 +209,50 @@ def _get_intel_gpus() -> str:
182209
logger.debug(xpu_output)
183210
if ResourcesChecksTexts.intel_gpu_no_devices in xpu_output:
184211
logger.debug("No devices")
185-
return ""
212+
return "", True
186213
if ResourcesChecksTexts.intel_gpu_max_card in xpu_output:
187214
logger.debug("Max 1100 found")
188-
return xpu_output
215+
return GPU_PROVIDER_INTEL_MAX, True
189216
except (CalledProcessError, TimeoutExpired, FileNotFoundError) as err:
190217
logger.debug(f"Getting the list of Intel GPU failed with {err}")
191218

192219
# Only valid for ARC cards
220+
if not _check_intel_gpu_driver(env):
221+
return "", False
222+
193223
try:
194-
command = 'clinfo|grep "' + ResourcesChecksTexts.intel_gpu_arc_device_name + '"|grep Intel'
224+
command = "lspci -nnk | grep -iA3 'VGA\|3D\|Display'"
195225
logger.debug(f"Getting the list of Intel ARC with {command}")
196226

197-
clinfo_output = subprocess.check_output( # noqa: S602 # nosec: B602
227+
lspci_output = subprocess.check_output( # noqa: S602 # nosec: B602
198228
command,
199229
stderr=subprocess.STDOUT,
200230
shell=True,
201231
timeout=5,
202232
env=env,
203233
).decode("utf-8")
204-
logger.debug(clinfo_output)
205-
if ResourcesChecksTexts.intel_gpu_arc_device_name in clinfo_output:
206-
logger.debug("ARC found")
207-
return clinfo_output
234+
driver = ""
235+
cards = lspci_output.split("--\n")
236+
for card in cards:
237+
drivers = re.findall(r"Kernel driver in use:\s*([^\s]+)", card)
238+
239+
if ResourcesChecksTexts.intel_gpu_i915_driver in drivers:
240+
driver = GPU_PROVIDER_INTEL_ARC_A
241+
elif ResourcesChecksTexts.intel_gpu_xe_driver in drivers:
242+
driver = GPU_PROVIDER_INTEL_ARC
243+
244+
if driver:
245+
first_line = card.split("\n")[0]
246+
if "Intel Corporation Device" in first_line:
247+
logger.debug(f"Intel dGPU found: {driver}")
248+
return driver, True
249+
250+
logger.debug(f"Intel iGPU found: {driver}")
251+
return driver, False
208252
except (CalledProcessError, TimeoutExpired, FileNotFoundError) as err:
209253
logger.debug(f"Getting the list of Intel ARC failed with {err}")
210254

211-
return ""
255+
return "", False
212256

213257

214258
def _get_nvidia_gpus():
@@ -258,22 +302,17 @@ def check_local_gpu(config: InstallationConfig): # noqa: ANN201
258302

259303
# If Nvidia not found, let's look for Intel GPU
260304
# We prefer Intel GPU, so ignoring Nvidia if Intel GPU found
261-
intel_gpus = _get_intel_gpus()
262-
if not intel_gpus and not nvidia_gpus:
305+
intel_gpu, isdGPU = _get_intel_gpus()
306+
if not intel_gpu and not nvidia_gpus:
263307
raise ResourcesCheckWarning(ResourcesChecksTexts.gpu_requirements_check_error)
264-
if intel_gpus:
265-
if ResourcesChecksTexts.intel_gpu_max_card in intel_gpus:
266-
config.gpu_provider.value = GPU_PROVIDER_INTEL_MAX
267-
elif ResourcesChecksTexts.intel_gpu_arc_a_card in intel_gpus:
268-
config.gpu_provider.value = GPU_PROVIDER_INTEL_ARC_A
269-
else:
270-
config.gpu_provider.value = GPU_PROVIDER_INTEL_ARC
271-
logger.info(f"GPU provider: {config.gpu_provider.value}")
308+
if intel_gpu and isdGPU:
309+
config.gpu_provider.value = intel_gpu
310+
logger.info(f"GPU provider (Intel dGPU): {config.gpu_provider.value}")
272311
elif nvidia_gpus:
273312
config.gpu_provider.value = GPU_PROVIDER_NVIDIA
274313
logger.info(f"GPU provider: {config.gpu_provider.value}")
275314
found_gpus = [f"{local_gpu['name']}, mem={str(local_gpu['memory_total'])}MiB" for local_gpu in nvidia_gpus]
276-
logger.debug(f"Found GPUs: {', '.join(found_gpus)}")
315+
logger.debug(f"Found nVidia GPUs: {', '.join(found_gpus)}")
277316

278317
unsupported_gpus = [gpu for gpu in nvidia_gpus if gpu["memory_total"] < SUPPORTED_GPUS_MEMORY]
279318
if unsupported_gpus:
@@ -282,6 +321,9 @@ def check_local_gpu(config: InstallationConfig): # noqa: ANN201
282321
raise UnsupportedGpuWarning(
283322
ResourcesChecksTexts.gpu_requirements_check_memory.format(gpus=unsupported_gpus_str)
284323
)
324+
elif intel_gpu:
325+
config.gpu_provider.value = intel_gpu
326+
logger.info(f"GPU provider (Intel iGPU): {config.gpu_provider.value}")
285327

286328

287329
def check_local_mem(): # noqa: ANN201

platform/services/installer/app/texts/checks.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,4 +214,5 @@ class ResourcesChecksTexts:
214214
intel_gpu_no_devices = "No device discovered"
215215
intel_gpu_max_card = "Data Center GPU Max 1100"
216216
intel_gpu_arc_device_name = "Device Name"
217-
intel_gpu_arc_a_card = "Arc(TM) A"
217+
intel_gpu_i915_driver = "i915"
218+
intel_gpu_xe_driver = "xe"

platform/services/installer/tests/unit/checks/test_resources_checks.py

Lines changed: 130 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@
1616
UnsupportedGpuWarning,
1717
)
1818
from checks.resources import (
19+
GPU_PROVIDER_INTEL_ARC,
20+
GPU_PROVIDER_INTEL_ARC_A,
21+
GPU_PROVIDER_INTEL_MAX,
1922
SUPPORTED_NVIDIA_DRIVER_VERSION,
23+
_get_intel_gpus,
2024
check_gpu_driver_version,
2125
check_local_cpu,
2226
check_local_disk,
@@ -29,6 +33,34 @@
2933
from configuration_models.upgrade_config import UpgradeConfig
3034
from texts.checks import ResourcesChecksTexts
3135

36+
arc_xe_description = """ 03:00.0 Display controller [0380]: Intel Corporation Device [8086:e216]
37+
Subsystem: Intel Corporation Device [8086:1500]
38+
Kernel driver in use: xe
39+
Kernel modules: xe"""
40+
41+
arc_i915_description = """ 03:00.0 Display controller [0380]: Intel Corporation Device [8086:e216]
42+
Subsystem: Intel Corporation Device [8086:1500]
43+
Kernel driver in use: i915
44+
Kernel modules: i915"""
45+
46+
igpu_description = """00:02.0 VGA compatible controller [0300]: Intel Corporation Raptor Lake-S GT1 [UHD Graphics 770] [8086:a780] (rev 04)
47+
DeviceName: Onboard IGD
48+
Subsystem: ASUSTeK Computer Inc. Raptor Lake-S GT1 [UHD Graphics 770] [1043:8882]
49+
Kernel driver in use: i915"""
50+
51+
nvidia_description = """08:00.0 VGA compatible controller [0300]: NVIDIA Corporation GA102 [GeForce RTX 3090] [10de:2204] (rev a1)
52+
Subsystem: Gigabyte Technology Co., Ltd GA102 [GeForce RTX 3090] [1458:4043]
53+
Kernel driver in use: nouveau
54+
Kernel modules: nvidiafb, nouveau"""
55+
56+
arc_xe_igpu_description = arc_xe_description + "\n--\n" + igpu_description
57+
58+
arc_i915_igpu_description = arc_i915_description + "\n--\n" + igpu_description
59+
60+
arc_xe_nvidia_description = arc_xe_description + "\n--\n" + nvidia_description
61+
62+
nvidia_igpu_description = nvidia_description + "\n--\n" + igpu_description
63+
3264

3365
def test_check_local_cpu(mocker):
3466
"""Check if the requirement for 12 physical cores passes successfully"""
@@ -91,36 +123,113 @@ def test_check_local_nvidia_gpu_ok(get_gpus_mock):
91123
assert install_config_mock.gpu_provider.value == "nvidia"
92124

93125

94-
def test_check_local_intel_gpu_ok(get_gpus_mock, get_intel_gpus_mock):
95-
get_gpus_mock.return_value = []
96-
get_intel_gpus_mock.return_value = "Device Name: Intel(R) Data Center GPU Max 1100"
126+
def test_get_intel_gpus_max_card(mocker):
127+
sub_process_mock = mocker.patch(
128+
"subprocess.check_output", return_value=ResourcesChecksTexts.intel_gpu_max_card.encode("utf-8")
129+
)
130+
gpus, _ = _get_intel_gpus()
131+
132+
assert GPU_PROVIDER_INTEL_MAX in gpus
133+
assert sub_process_mock.call_count == 1
134+
135+
136+
def test_get_intel_gpus_arc_xe_card(mocker):
137+
sub_process_mock = mocker.patch("subprocess.check_output", return_value=arc_xe_description.encode("utf-8"))
138+
check_intel_gpu_driver_mock = mocker.patch("checks.resources._check_intel_gpu_driver", return_value=True)
139+
140+
gpus, isdGPU = _get_intel_gpus()
141+
142+
assert GPU_PROVIDER_INTEL_ARC in gpus
143+
assert isdGPU is True
144+
assert check_intel_gpu_driver_mock.call_count == 1
145+
assert sub_process_mock.call_count == 2
146+
147+
148+
def test_get_intel_gpus_arc_i915_card(mocker):
149+
sub_process_mock = mocker.patch("subprocess.check_output", return_value=arc_i915_description.encode("utf-8"))
150+
check_intel_gpu_driver_mock = mocker.patch("checks.resources._check_intel_gpu_driver", return_value=True)
151+
152+
gpus, isdGPU = _get_intel_gpus()
153+
154+
assert GPU_PROVIDER_INTEL_ARC_A in gpus
155+
assert isdGPU is True
156+
assert check_intel_gpu_driver_mock.call_count == 1
157+
assert sub_process_mock.call_count == 2
158+
159+
160+
def test_get_intel_gpus_arc_igpu_card(mocker):
161+
sub_process_mock = mocker.patch("subprocess.check_output", return_value=arc_i915_igpu_description.encode("utf-8"))
162+
check_intel_gpu_driver_mock = mocker.patch("checks.resources._check_intel_gpu_driver", return_value=True)
163+
164+
gpus, isdPGU = _get_intel_gpus()
165+
166+
assert GPU_PROVIDER_INTEL_ARC_A in gpus
167+
assert isdPGU is True
168+
assert check_intel_gpu_driver_mock.call_count == 1
169+
assert sub_process_mock.call_count == 2
170+
171+
172+
def test_get_intel_gpus_igpu_card(mocker):
173+
sub_process_mock = mocker.patch("subprocess.check_output", return_value=igpu_description.encode("utf-8"))
174+
check_intel_gpu_driver_mock = mocker.patch("checks.resources._check_intel_gpu_driver", return_value=True)
175+
176+
gpus, isdPGU = _get_intel_gpus()
177+
178+
assert GPU_PROVIDER_INTEL_ARC_A in gpus
179+
assert isdPGU is False
180+
assert check_intel_gpu_driver_mock.call_count == 1
181+
assert sub_process_mock.call_count == 2
182+
183+
184+
def test_check_local_nvidia_arc(mocker):
185+
get_intel_mock = mocker.patch("checks.resources._get_intel_gpus", return_value=(GPU_PROVIDER_INTEL_ARC, True))
186+
get_nvidia_mock = mocker.patch(
187+
"checks.resources._get_nvidia_gpus",
188+
return_value=[
189+
{
190+
"name": "NVIDIA GeForce RTX 3090",
191+
"memory_total": 24576,
192+
}
193+
],
194+
)
195+
97196
install_config_mock = InstallationConfig(interactive_mode=False, install_telemetry_stack=False)
98197
install_config_mock.gpu_support.value = True
99198
check_local_gpu(config=install_config_mock)
100-
assert get_gpus_mock.call_count == 1
101-
assert get_intel_gpus_mock.call_count == 1
102-
assert install_config_mock.gpu_provider.value == "intel-max"
103-
199+
assert get_intel_mock.call_count == 1
200+
assert get_nvidia_mock.call_count == 1
201+
assert install_config_mock.gpu_provider.value == GPU_PROVIDER_INTEL_ARC
202+
203+
204+
def test_check_local_nvidia_igpu(mocker):
205+
get_intel_mock = mocker.patch("checks.resources._get_intel_gpus", return_value=(GPU_PROVIDER_INTEL_ARC, False))
206+
get_nvidia_mock = mocker.patch(
207+
"checks.resources._get_nvidia_gpus",
208+
return_value=(
209+
[
210+
{
211+
"name": "NVIDIA GeForce RTX 3090",
212+
"memory_total": 24576,
213+
}
214+
]
215+
),
216+
)
104217

105-
def test_check_local_intel_gpu_arc_ok(get_gpus_mock, get_intel_gpus_mock):
106-
get_gpus_mock.return_value = []
107-
get_intel_gpus_mock.return_value = "Device Name Intel(R) Graphics"
108218
install_config_mock = InstallationConfig(interactive_mode=False, install_telemetry_stack=False)
109219
install_config_mock.gpu_support.value = True
110220
check_local_gpu(config=install_config_mock)
111-
assert get_gpus_mock.call_count == 1
112-
assert get_intel_gpus_mock.call_count == 1
113-
assert install_config_mock.gpu_provider.value == "intel-arc"
221+
assert get_intel_mock.call_count == 1
222+
assert get_nvidia_mock.call_count == 1
223+
assert install_config_mock.gpu_provider.value == "nvidia"
114224

115225

116-
def test_check_local_gpu_not_found(get_gpus_mock, get_intel_gpus_mock):
117-
get_gpus_mock.return_value = []
118-
get_intel_gpus_mock.return_value = ""
119-
with pytest.raises(ResourcesCheckWarning):
120-
install_config_mock = InstallationConfig(interactive_mode=False, install_telemetry_stack=False)
121-
check_local_gpu(config=install_config_mock)
122-
assert get_gpus_mock.call_count == 1
123-
assert get_intel_gpus_mock.call_count == 1
226+
def test_get_intel_gpus_no_card(mocker):
227+
sub_process_mock = mocker.patch("subprocess.check_output", return_value=b"lack of Intel gpu")
228+
229+
gpus = _get_intel_gpus()
230+
231+
assert not gpus[0]
232+
assert sub_process_mock.call_count == 2
124233

125234

126235
def test_check_local_gpu_not_supported(get_gpus_mock):

0 commit comments

Comments
 (0)