mirror of
https://github.com/immich-app/immich.git
synced 2025-07-13 20:38:46 +02:00
feat(ml): export clip models to ONNX and host models on Hugging Face (#4700)
* export clip models * export to hf refactored export code * export mclip, general refactoring cleanup * updated conda deps * do transforms with pillow and numpy, add tokenization config to export, general refactoring * moved conda dockerfile, re-added poetry * minor fixes * updated link * updated tests * removed `requirements.txt` from workflow * fixed mimalloc path * removed torchvision * cleaner np typing * review suggestions * update default model name * update test
This commit is contained in:
parent
3212a47720
commit
87a0ba3db3
29 changed files with 6192 additions and 2043 deletions
machine-learning/app
|
@ -1,5 +1,6 @@
|
|||
import json
|
||||
from typing import Any, Iterator, TypeAlias
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterator
|
||||
from unittest import mock
|
||||
|
||||
import numpy as np
|
||||
|
@ -8,8 +9,7 @@ from fastapi.testclient import TestClient
|
|||
from PIL import Image
|
||||
|
||||
from .main import app, init_state
|
||||
|
||||
ndarray: TypeAlias = np.ndarray[int, np.dtype[np.float32]]
|
||||
from .schemas import ndarray_f32
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -18,13 +18,13 @@ def pil_image() -> Image.Image:
|
|||
|
||||
|
||||
@pytest.fixture
|
||||
def cv_image(pil_image: Image.Image) -> ndarray:
|
||||
def cv_image(pil_image: Image.Image) -> ndarray_f32:
|
||||
return np.asarray(pil_image)[:, :, ::-1] # PIL uses RGB while cv2 uses BGR
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_get_model() -> Iterator[mock.Mock]:
|
||||
with mock.patch("app.models.cache.InferenceModel.from_model_type", autospec=True) as mocked:
|
||||
with mock.patch("app.models.cache.from_model_type", autospec=True) as mocked:
|
||||
yield mocked
|
||||
|
||||
|
||||
|
@ -37,3 +37,25 @@ def deployed_app() -> TestClient:
|
|||
@pytest.fixture(scope="session")
|
||||
def responses() -> dict[str, Any]:
|
||||
return json.load(open("responses.json", "r"))
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def clip_model_cfg() -> dict[str, Any]:
|
||||
return {
|
||||
"embed_dim": 512,
|
||||
"vision_cfg": {"image_size": 224, "layers": 12, "width": 768, "patch_size": 32},
|
||||
"text_cfg": {"context_length": 77, "vocab_size": 49408, "width": 512, "heads": 8, "layers": 12},
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def clip_preprocess_cfg() -> dict[str, Any]:
|
||||
return {
|
||||
"size": [224, 224],
|
||||
"mode": "RGB",
|
||||
"mean": [0.48145466, 0.4578275, 0.40821073],
|
||||
"std": [0.26862954, 0.26130258, 0.27577711],
|
||||
"interpolation": "bicubic",
|
||||
"resize_mode": "shortest",
|
||||
"fill_color": 0,
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue