import os
import logging
import chromadb
import requests
from chromadb.config import Settings
from typing import List, Dict, Any, Optional
from embedding_service import embedding_service
logger = logging.getLogger(__name__)
class OllamaEmbeddingFunction:

    def __init__(self):
        self.name = "ollama-embedding-function"
        self.dimension = embedding_service.dimension
    def __call__(self, input_texts: List[str]) -> List[List[float]]:

        try:
            return embedding_service.create_embeddings_batch(input_texts)
        except Exception as e:
            logger.error(f"Ошибка создания эмбеддингов: {e}")
            return [[0.0] * self.dimension for _ in input_texts]
class ChromaDBClient:

    def __init__(self, host: str = None, port: int = None):

        self.host = host or os.getenv('CHROMADB_HOST', '')
        port_env = os.getenv('CHROMADB_PORT')
        self.port = port or (int(port_env) if port_env else None)
        self.client = None
        self.collection_name = os.getenv('CHROMADB_COLLECTION_NAME', 'shared_documents_cosine')
        self.connect()
    def connect(self):

        try:
            import os
            os.environ["ANONYMIZED_TELEMETRY"] = "False"
            settings = Settings(
                anonymized_telemetry=False,
                allow_reset=True
            )
            try:
                self.client = chromadb.HttpClient(
                    host=self.host,
                    port=self.port,
                    settings=settings
                )
                logger.info(f" Подключен к ChromaDB HTTP: {self.host}:{self.port}")
            except Exception as e:
                logger.warning(f"HttpClient не работает, пробуем PersistentClient: {e}")
                persist_directory = "/opt/autogen/chroma_db"
                self.client = chromadb.PersistentClient(
                    path=persist_directory,
                    settings=settings
                )
                logger.info(f" Подключен к ChromaDB Persistent: {persist_directory}")
            self.client.list_collections()
            logger.info(" Подключение к ChromaDB успешно протестировано")
        except Exception as e:
            logger.error(f" Ошибка подключения к ChromaDB: {e}")
            raise e
    def get_collection(self, use_ollama_embeddings: bool = True):

        try:
            collections = self.client.list_collections()
            collection_names = [c.name for c in collections]
            if self.collection_name not in collection_names:
                logger.info(f"Коллекция {self.collection_name} не найдена, создаем...")
                collection = self.client.create_collection(
                    name=self.collection_name,
                    metadata={
                        "description": "RAG documents collection",
                        "hnsw:space": "cosine"
                    }
                )
                logger.info(f" Коллекция {self.collection_name} создана с cosine distance")
            else:
                collection = self.client.get_collection(self.collection_name)
                metadata = collection.metadata or {}
                distance_metric = metadata.get("hnsw:space", "l2")
                logger.info(f" Коллекция {self.collection_name} найдена, метрика: {distance_metric}")
            return collection
        except Exception as e:
            logger.error(f" Ошибка работы с коллекцией: {e}")
            raise e
    def add_documents(self, documents: List[str], metadatas: List[Dict], ids: List[str]):

        try:
            collection = self.get_collection()
            embeddings = embedding_service.create_embeddings_batch(documents)
            collection.add(
                documents=documents,
                metadatas=metadatas,
                ids=ids,
                embeddings=embeddings
            )
            logger.info(f" Добавлено {len(documents)} документов в коллекцию {self.collection_name}")
            return True
        except Exception as e:
            logger.error(f" Ошибка добавления документов: {e}")
            return False
    def search_documents(self, query_texts: List[str], n_results: int = 5) -> List[Dict]:

        try:
            collection = self.get_collection()
            query_embeddings = embedding_service.create_embeddings_batch(query_texts)
            results = collection.query(
                query_embeddings=query_embeddings,
                n_results=n_results
            )
            documents = []
            if results['documents'] and results['documents'][0]:
                for i, doc in enumerate(results['documents'][0]):
                    if logger.isEnabledFor(logging.DEBUG):
                        logger.debug(f"DEBUG: Doc {i} length: {len(doc)} chars, preview: {doc[:50]}...")
                    documents.append({
                        'content': doc,
                        'metadata': results['metadatas'][0][i] if results['metadatas'] and results['metadatas'][0] else {},
                        'distance': results['distances'][0][i] if results['distances'] and results['distances'][0] else 0.0
                    })
            logger.info(f" Найдено {len(documents)} документов")
            return documents
        except Exception as e:
            logger.error(f" Ошибка поиска документов: {e}")
            return []
    def get_collection_info(self) -> Dict[str, Any]:

        try:
            collection = self.get_collection()
            count = collection.count()
            return {
                'name': self.collection_name,
                'count': count,
                'host': self.host,
                'port': self.port
            }
        except Exception as e:
            logger.error(f" Ошибка получения информации о коллекции: {e}")
            return {}
    def delete_documents_by_metadata(self, where: Dict[str, Any]) -> int:

        try:
            collection = self.get_collection()
            results = collection.get(where=where)
            if not results['ids'] or len(results['ids']) == 0:
                logger.info(f"Документы с условием {where} не найдены")
                return 0
            collection.delete(ids=results['ids'])
            deleted_count = len(results['ids'])
            logger.info(f"Удалено {deleted_count} документов с условием {where}")
            return deleted_count
        except Exception as e:
            logger.error(f"Ошибка удаления документов: {e}")
            return 0
    def get_crawler_documents_count(self) -> Dict[str, int]:

        try:
            collection = self.get_collection()
            counts = {}
            crawler_sources = ['website_crawl', 'single_url_parse', 'multiple_urls_parse']
            for source in crawler_sources:
                try:
                    results = collection.get(where={'source': source})
                    counts[source] = len(results['ids']) if results['ids'] else 0
                except Exception as e:
                    logger.warning(f"Ошибка подсчета для {source}: {e}")
                    counts[source] = 0
            total = sum(counts.values())
            counts['total'] = total
            return counts
        except Exception as e:
            logger.error(f"Ошибка подсчета документов краулера: {e}")
            return {'total': 0}
chroma_client = ChromaDBClient()