#!/usr/bin/env python3
"""
Test processor with oldest 5 records from Chicago data
"""

import sys
import pandas as pd
from pathlib import Path

# Add the scripts directory to path
sys.path.append('/var/www/twin-digital-media/public_html/_sites/cleankitchens/production/scripts')

from comprehensive_processor import CleanKitchensProcessor

def test_oldest_5_records():
    """Test with the 5 oldest inspection records"""
    
    # Use the Chicago CSV file we found
    csv_file = "/var/www/twin-digital-media/public_html/_sites/cleankitchens/ck-aug-backup/files/il/chicago/chicago.csv"
    
    if not Path(csv_file).exists():
        print(f"❌ File not found: {csv_file}")
        return
    
    print("📖 Reading CSV to find oldest records...")
    
    # Read CSV and find oldest records (tab-separated)
    df = pd.read_csv(csv_file, sep='\t', on_bad_lines='skip', low_memory=False)
    
    # Convert inspection date to datetime for sorting
    df['Inspection Date'] = pd.to_datetime(df['Inspection Date'], errors='coerce')
    
    # Sort by inspection date and get oldest 5
    df_sorted = df.dropna(subset=['Inspection Date']).sort_values('Inspection Date')
    oldest_5 = df_sorted.head(5)
    
    print(f"\n📊 Found oldest 5 records:")
    for idx, row in oldest_5.iterrows():
        print(f"  {row['Inspection Date'].strftime('%Y-%m-%d')} - {row.get('DBA Name', 'Unknown')} - {row.get('Results', 'Unknown')}")
    
    # Save oldest 5 to temp file
    temp_file = '/tmp/oldest_5_inspections.csv'
    oldest_5.to_csv(temp_file, index=False)
    print(f"\n💾 Saved oldest 5 to: {temp_file}")
    
    # Process with our system
    print("\n🔄 Starting processor test...")
    processor = CleanKitchensProcessor()
    
    try:
        processor.process_bulk_upload(temp_file, batch_size=5)
        
        print(f"\n✅ Test complete!")
        print(f"Articles generated: {processor.articles_generated}")
        print(f"Estimated cost: ${processor.total_cost:.4f}")
        
        # Show what was created
        print("\n📋 Generated content:")
        
        # Query articles from Weaviate to see what was created
        articles = processor.articles.query.fetch_objects(limit=10)
        
        for obj in articles.objects:
            props = obj.properties
            article_type = props.get('article_type', 'individual')
            title = props.get('title', 'Unknown')
            published = props.get('published_date', 'Unknown')[:10]  # Just date part
            
            if article_type == 'group_pass':
                restaurant_count = props.get('restaurant_count', 0)
                print(f"  📰 GROUP PASS: {title} ({restaurant_count} restaurants) - {published}")
            else:
                establishment = props.get('establishment_name', 'Unknown')
                print(f"  📰 INDIVIDUAL: {title} ({establishment}) - {published}")
        
    except Exception as e:
        print(f"❌ Error during test: {e}")
    
    finally:
        # Clean up
        print(f"\n🧹 Cleaning up temp file...")
        Path(temp_file).unlink(missing_ok=True)

if __name__ == "__main__":
    test_oldest_5_records()