#!/usr/bin/env python3
"""
Test vectorization and fix missing vectors
"""
import weaviate
from sentence_transformers import SentenceTransformer
from datetime import datetime
import sys

def test_vectors():
    print("="*60)
    print("VECTOR DATABASE TEST")
    print("="*60)
    
    # Connect to Weaviate
    print("Connecting to Weaviate...")
    client = weaviate.connect_to_local(host="localhost", port=8080)
    
    try:
        # Check collections
        collections = client.collections.list_all()
        print(f"\nCollections found: {list(collections.keys())}")
        
        if "RawInspection" in collections:
            collection = client.collections.get("RawInspection")
            
            # Get count
            response = collection.aggregate.over_all(total_count=True)
            total = response.total_count
            print(f"\nTotal records in RawInspection: {total:,}")
            
            # Test semantic search
            print("\n" + "="*60)
            print("TESTING SEMANTIC SEARCH")
            print("="*60)
            
            # Load model for search
            print("Loading Sentence Transformer model...")
            model = SentenceTransformer('all-MiniLM-L6-v2')
            
            test_queries = [
                "pizza restaurant rodent violation",
                "temperature control failure", 
                "chinese restaurant health code",
                "failed inspection closure"
            ]
            
            for query in test_queries:
                print(f"\nQuery: '{query}'")
                query_vector = model.encode(query).tolist()
                
                try:
                    results = collection.query.near_vector(
                        near_vector=query_vector,
                        limit=3,
                        return_properties=["dba_name", "results", "city"]
                    )
                    
                    if results.objects:
                        for i, obj in enumerate(results.objects, 1):
                            print(f"  {i}. {obj.properties.get('dba_name', 'Unknown')[:50]}")
                            print(f"     Result: {obj.properties.get('results', 'N/A')}")
                            print(f"     City: {obj.properties.get('city', 'N/A')}")
                    else:
                        print("  ❌ No results found - vectors may be missing!")
                        
                except Exception as e:
                    print(f"  ❌ Search failed: {e}")
                    print("     This likely means vectors are missing!")
            
            # Check how many records have vectors
            print("\n" + "="*60)
            print("CHECKING VECTOR COVERAGE")
            print("="*60)
            
            # Sample check - get a batch
            sample = collection.query.fetch_objects(limit=100)
            
            with_vectors = 0
            without_vectors = 0
            
            for obj in sample.objects:
                if hasattr(obj, 'vector') and obj.vector:
                    with_vectors += 1
                else:
                    without_vectors += 1
            
            print(f"Sample of 100 records:")
            print(f"  With vectors: {with_vectors}")
            print(f"  Without vectors: {without_vectors}")
            
            if without_vectors > 0:
                print(f"\n⚠️  WARNING: Many records lack vectors!")
                print(f"   The import only vectorized 10 test records.")
                print(f"   Need to run full vectorization for {total-10:,} records.")
                print(f"\nTo fix, run: python vectorize_all.py")
            else:
                print(f"\n✅ All sampled records have vectors!")
                
        else:
            print("❌ RawInspection collection not found!")
            
    except Exception as e:
        print(f"❌ Error: {e}")
        import traceback
        traceback.print_exc()
    finally:
        client.close()
        
    print("\n" + "="*60)
    print("TEST COMPLETE")
    print("="*60)

if __name__ == "__main__":
    test_vectors()