import os
import requests
import logging
from typing import List, Dict, Any, Optional, Union
from functools import lru_cache
import time
import random
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
logger = logging.getLogger(__name__)
def _parse_keep_alive(value: Optional[Union[str, int, float]]) -> Union[str, int, float]:

    default = '12h'
    if value is None:
        return default
    value_str = str(value).strip()
    if not value_str:
        return default
    if value_str.isdigit():
        return int(value_str)
    try:
        float_val = float(value_str)
        return float_val
    except ValueError:
        pass
    return value_str
class EmbeddingService:

    def __init__(self):
        ollama_url_str = os.getenv('OLLAMA_BASE_URL', '').strip().rstrip('/')
        if ',' in ollama_url_str:
            logger.warning(f"OLLAMA_BASE_URL содержит несколько URL, берем только первый: {ollama_url_str}")
            ollama_url_str = ollama_url_str.split(',')[0].strip().rstrip('/')
        self.ollama_url = ollama_url_str
        self.ollama_urls = [self.ollama_url] if self.ollama_url else []
        self._default_embedding_model = 'jeffh/intfloat-multilingual-e5-large-instruct:q8_0'
        self.embedding_model = self._get_embedding_model()
        self.timeout = (10, 60)
        self.max_retries = int(os.getenv('EMBEDDING_MAX_RETRIES', '3'))
        self.retry_delay = float(os.getenv('EMBEDDING_RETRY_DELAY', '1.0'))
        self.keep_alive = _parse_keep_alive(os.getenv('OLLAMA_KEEP_ALIVE', '12h'))
        if len(self.ollama_urls) > 1:
            logger.info(f"Embedding Service инициализирован с {len(self.ollama_urls)} Ollama серверами: {self.ollama_urls}")
        else:
            logger.info(f"Embedding Service инициализирован с Ollama: {self.ollama_url}")
        self.max_workers = int(os.getenv('EMBEDDING_MAX_WORKERS', '8'))
        self.dimension = 1024
        self.session = requests.Session()
        self.session.headers.update({'Content-Type': 'application/json'})
        self.dimension = self._get_model_dimension()
    def _get_embedding_model(self) -> str:

        model = os.getenv('EMBEDDING_MODEL')
        if model:
            return model
        try:
            settings_file = '/opt/autogen/settings.json'
            if os.path.exists(settings_file):
                import json
                with open(settings_file, 'r', encoding='utf-8') as f:
                    settings = json.load(f)
                    model = settings.get('embedding_model')
                    if model:
                        return model
        except Exception as e:
            logger.debug(f"Не удалось загрузить модель из settings.json: {e}")
        return self._default_embedding_model
    def _get_model_dimension(self) -> int:

        current_model = self._get_embedding_model()
        model_dimensions = {
            'nomic-embed-text:latest': 384,
            'mxbai-embed-large:latest': 1024,
            'all-minilm:latest': 384,
            'all-minilm:l6-v2': 384,
            'sentence-transformers/all-MiniLM-L6-v2': 384,
            'jeffh/intfloat-multilingual-e5-large-instruct:q8_0': 1024,
            'intfloat/multilingual-e5-large-instruct': 1024,
            'intfloat/multilingual-e5-base': 768,
            'intfloat/multilingual-e5-small': 384
        }
        dimension = model_dimensions.get(current_model, 1024)
        logger.info(f"Размерность модели {current_model}: {dimension}")
        logger.info(f"Ollama keep_alive для embedding service: {self.keep_alive!r}")
        return dimension
    def create_embedding(self, text: str, retries: int = None) -> List[float]:

        if retries is None:
            retries = self.max_retries
        current_model = self._get_embedding_model()
        session = requests.Session()
        session.headers.update({'Content-Type': 'application/json'})
        for attempt in range(retries + 1):
            ollama_url = self.ollama_url
            try:
                embedding_url = f"{ollama_url}/api/embeddings"
                if attempt == 0:
                    logger.debug(f"Запрос эмбеддинга к: {embedding_url}, модель: {current_model}")
                response = session.post(
                    embedding_url,
                    json={
                        "model": current_model,
                        "prompt": text,
                        "keep_alive": self.keep_alive
                    },
                    timeout=self.timeout
                )
                if response.status_code == 200:
                    data = response.json()
                    embedding = data.get('embedding', [])
                    current_dimension = self._get_model_dimension()
                    if len(embedding) == current_dimension:
                        logger.debug(f"Создан эмбеддинг размерности {len(embedding)} для модели {current_model}: {text[:50]}...")
                        session.close()
                        return embedding
                    else:
                        logger.warning(f"Неожиданная размерность эмбеддинга: {len(embedding)}, ожидалось: {current_dimension} для модели {current_model}")
                        session.close()
                        return embedding
                else:
                    error_info = f"Status: {response.status_code}, URL: {embedding_url}, Headers: {dict(response.headers)}, Response: {response.text[:300]}"
                    logger.error(f"Ошибка создания эмбеддинга (попытка {attempt + 1}): {response.status_code} - {response.text[:200]}")
                    if response.status_code == 405:
                        logger.error(f"Детали ошибки 405: {error_info}")
            except requests.exceptions.ConnectTimeout as e:
                logger.error(f"Таймаут подключения к Ollama (попытка {attempt + 1}): {e}")
                if attempt < retries:
                    time.sleep(self.retry_delay)
            except requests.exceptions.Timeout as e:
                logger.error(f"Таймаут чтения ответа от Ollama (попытка {attempt + 1}): {e}")
                if attempt < retries:
                    time.sleep(self.retry_delay * (2 ** attempt))
            except requests.exceptions.RequestException as e:
                logger.error(f"Ошибка запроса к Ollama (попытка {attempt + 1}): {e}")
                if attempt < retries:
                    time.sleep(self.retry_delay * (2 ** attempt))
            except Exception as e:
                logger.error(f"Неожиданная ошибка при запросе к Ollama (попытка {attempt + 1}): {e}")
                if attempt < retries:
                    time.sleep(self.retry_delay * (2 ** attempt))
            finally:
                try:
                    session.close()
                except:
                    pass
        logger.error(f"Не удалось создать эмбеддинг после {retries + 1} попыток, возвращаем нулевой вектор")
        return [0.0] * self.dimension
    def create_embeddings_batch(self, texts: List[str], batch_size: int = None) -> List[List[float]]:

        if not texts:
            return []
        if batch_size is None:
            batch_size = int(os.getenv('EMBEDDING_BATCH_SIZE', '20'))
        embeddings = [None] * len(texts)
        logger.info(f"Создаем {len(texts)} эмбеддингов параллельно (workers={self.max_workers}, batch_size={batch_size})...")
        for i in range(0, len(texts), batch_size):
            batch = texts[i:i + batch_size]
            batch_indices = list(range(i, min(i + batch_size, len(texts))))
            logger.debug(f"Обрабатываем батч {i//batch_size + 1}, размер: {len(batch)}")
            with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
                future_to_index = {
                    executor.submit(self.create_embedding, text): idx
                    for idx, text in zip(batch_indices, batch)
                }
                for future in as_completed(future_to_index):
                    idx = future_to_index[future]
                    try:
                        embedding = future.result()
                        embeddings[idx] = embedding
                    except Exception as e:
                        logger.error(f"Ошибка создания эмбеддинга для текста {idx}: {e}")
                        embeddings[idx] = [0.0] * self.dimension
        logger.info(f"Создано {len(embeddings)} эмбеддингов для {len(texts)} текстов")
        return embeddings
    def get_embedding_cached(self, text: str) -> List[float]:

        return self._cached_embedding(text)
    @lru_cache(maxsize=1000)
    def _cached_embedding(self, text: str) -> List[float]:

        return self.create_embedding(text)
    def clear_cache(self):

        self._cached_embedding.cache_clear()
        logger.info("Кэш эмбеддингов очищен")
    def get_stats(self) -> Dict[str, Any]:

        cache_info = self._cached_embedding.cache_info()
        return {
            'model': self.embedding_model,
            'dimension': self.dimension,
            'timeout': self.timeout,
            'max_retries': self.max_retries,
            'cache_hits': cache_info.hits,
            'cache_misses': cache_info.misses,
            'cache_size': cache_info.currsize,
            'cache_maxsize': cache_info.maxsize
        }
embedding_service = EmbeddingService()