import os
import torch
from PIL import Image
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM

from src.utils.singleton import Singleton
from src.modules.config import Config, config as ConfigObj
from src.modules.logger import logger as Logger


class ModelLoader:
    __metaclass__ = Singleton

    def __init__(self, conf: Config):
        #self._model = None
        #return  # TODO remove this line

        #print("Not implemented !!!")
        print(conf.model_name)

        #revision = "2024-03-04" - verry slow
        #revision = "main" #does not work
        #revision = "2024-05-08" not work
        revision = "2024-08-26"
        if conf.gpu_mode:
            self._model = AutoModelForCausalLM.from_pretrained(
                conf.model_name, trust_remote_code=True, revision=revision,
                torch_dtype=torch.bfloat16, cache_dir=conf.models_cache_dir,
                device_map={"": "cuda"}, attn_implementation="flash_attention_2"
            ).to("cuda")
        else:
            self._model = AutoModelForCausalLM.from_pretrained(
                conf.model_name, trust_remote_code=True, revision=revision,
                cache_dir=conf.models_cache_dir,
            )
        self._tokenizer = AutoTokenizer.from_pretrained(
            conf.model_name, revision=revision, cache_dir=conf.models_cache_dir
        )
        self._model.eval()

    def image_describe(self, image_path):
        image = Image.open(image_path)
        enc_image = self._model.encode_image(image)
        return self._model.answer_question(enc_image, "Describe this image.", self._tokenizer)

    def image_ask(self, image_path, question):
        image = Image.open(image_path)
        enc_image = self._model.encode_image(image)
        return self._model.answer_question(
            enc_image, question,
            self._tokenizer
        )


model_loader = ModelLoader(ConfigObj)