feat(ml): rocm ()

* feat(ml): introduce support of onnxruntime-rocm for AMD GPU

* try mutex for algo cache

use OrtMutex

* bump versions, run on mich

use 3.12

use 1.19.2

* acquire lock before any changes can be made

guard algo benchmark results

mark mutex as mutable

re-add /bin/sh (?)

use 3.10

use 6.1.2

* use composite cache key

1.19.2

fix variable name

fix variable reference

aaaaaaaaaaaaaaaaaaaa

* bump deps

* disable algo caching

* fix gha

* try ubuntu runner

* actually fix the gha

* update patch

* skip mimalloc preload for rocm

* increase build threads

* increase timeout for rocm

* Revert "increase timeout for rocm"

This reverts commit 2c4452f5d132198ed381a7b262b4a5cab5114b5f.

* attempt migraphx

* set migraphx_home

* Revert "set migraphx_home"

This reverts commit c121d3e48754b3bce100636f8d666deec58a44b7.

* Revert "attempt migraphx"

This reverts commit 521f9fb72dbe506dc6cb8faeb6494817d87265c6.

* migraphx, take two

* bump rocm

* allow cpu

* try only targeting migraphx

* skip tests

* migraph 

* known issues

* target gfx900 and gfx1102

* mention `HSA_USE_SVM`

* update lock

* set device id for rocm

---------

Co-authored-by: Mehdi GHESH <mehdi.ghesh@hotmail.fr>
This commit is contained in:
Mert 2025-03-17 17:08:19 -04:00 committed by GitHub
commit 2b37caba03
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 340 additions and 50 deletions
machine-learning/app

View file

@ -180,6 +180,7 @@ class TestOrtSession:
OV_EP = ["OpenVINOExecutionProvider", "CPUExecutionProvider"]
CUDA_EP_OUT_OF_ORDER = ["CPUExecutionProvider", "CUDAExecutionProvider"]
TRT_EP = ["TensorrtExecutionProvider", "CUDAExecutionProvider", "CPUExecutionProvider"]
ROCM_EP = ["ROCMExecutionProvider", "CPUExecutionProvider"]
@pytest.mark.providers(CPU_EP)
def test_sets_cpu_provider(self, providers: list[str]) -> None:
@ -219,6 +220,12 @@ class TestOrtSession:
assert session.providers == self.CUDA_EP
@pytest.mark.providers(ROCM_EP)
def test_uses_rocm(self, providers: list[str]) -> None:
session = OrtSession("ViT-B-32__openai")
assert session.providers == self.ROCM_EP
def test_sets_provider_kwarg(self) -> None:
providers = ["CUDAExecutionProvider"]
session = OrtSession("ViT-B-32__openai", providers=providers)
@ -235,19 +242,33 @@ class TestOrtSession:
{"arena_extend_strategy": "kSameAsRequested"},
]
def test_sets_device_id_for_openvino(self) -> None:
def test_sets_provider_options_for_openvino(self) -> None:
model_path = "/cache/ViT-B-32__openai/textual/model.onnx"
os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1"
session = OrtSession("ViT-B-32__openai", providers=["OpenVINOExecutionProvider"])
session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"])
assert session.provider_options[0]["device_type"] == "GPU.1"
assert session.provider_options == [
{
"device_type": "GPU.1",
"precision": "FP32",
"cache_dir": "/cache/ViT-B-32__openai/textual/openvino",
}
]
def test_sets_device_id_for_cuda(self) -> None:
def test_sets_provider_options_for_cuda(self) -> None:
os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1"
session = OrtSession("ViT-B-32__openai", providers=["CUDAExecutionProvider"])
assert session.provider_options[0]["device_id"] == "1"
assert session.provider_options == [{"arena_extend_strategy": "kSameAsRequested", "device_id": "1"}]
def test_sets_provider_options_for_rocm(self) -> None:
os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1"
session = OrtSession("ViT-B-32__openai", providers=["ROCMExecutionProvider"])
assert session.provider_options == [{"arena_extend_strategy": "kSameAsRequested", "device_id": "1"}]
def test_sets_provider_options_kwarg(self) -> None:
session = OrtSession(