#!/usr/bin/env python3
"""
Test processing with just 10 most recent records to debug the issue
"""

import os
import sys
import pandas as pd
import logging
from pathlib import Path
from datetime import datetime

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Add the scripts directory to path
sys.path.append('/var/www/twin-digital-media/public_html/_sites/cleankitchens/production/scripts')

from comprehensive_processor import CleanKitchensProcessor

def test_recent_10():
    """Test with just 10 most recent records"""
    
    csv_file = "/var/www/twin-digital-media/public_html/_sites/cleankitchens/production/data/chicago_food_inspections_latest.csv"
    
    if not Path(csv_file).exists():
        logging.error(f"❌ File not found: {csv_file}")
        return
    
    logging.info("📖 Reading CSV and getting 10 most recent records...")
    
    # Read CSV
    df = pd.read_csv(csv_file, low_memory=False)
    logging.info(f"📊 Total records in dataset: {len(df):,}")
    
    # Convert inspection date to datetime
    df['inspection_date'] = pd.to_datetime(df['inspection_date'], errors='coerce')
    
    # Filter for 2023-2025 and get most recent 10
    df_filtered = df.dropna(subset=['inspection_date'])
    df_recent = df_filtered[df_filtered['inspection_date'].dt.year >= 2023]
    df_sorted = df_recent.sort_values('inspection_date', ascending=False)
    df_test = df_sorted.head(10)
    
    logging.info(f"📊 Testing with 10 most recent records:")
    for idx, row in df_test.iterrows():
        logging.info(f"  {row['inspection_date']} - {row['dba_name']} - {row['results']}")
    
    # Save test data
    temp_file = '/tmp/test_10_recent.csv'
    df_test.to_csv(temp_file, index=False)
    logging.info(f"💾 Saved test data to: {temp_file}")
    
    # Process with our system
    logging.info("🔄 Starting test processing...")
    processor = CleanKitchensProcessor()
    
    try:
        processor.process_bulk_upload(temp_file, batch_size=10)
        
        logging.info("✅ Test processing complete!")
        logging.info(f"Articles generated: {processor.articles_generated}")
        logging.info(f"Total cost: ${processor.total_cost:.4f}")
        
        # Show what was created
        articles = processor.articles.query.fetch_objects(limit=20)
        logging.info(f"📋 Total articles in database: {len(articles.objects)}")
        
        # Show newest articles
        for i, obj in enumerate(articles.objects[:5]):
            props = obj.properties
            title = props.get('title', 'Unknown')
            published = props.get('published_date', 'Unknown')
            tags = props.get('auto_tags', [])
            
            if hasattr(published, 'strftime'):
                published = published.strftime('%Y-%m-%d')
            elif isinstance(published, str):
                published = published[:10]
            
            logging.info(f"  {i+1}. {title[:60]}... - {published}")
            logging.info(f"     Tags: {tags[:10]}...")  # First 10 tags
        
    except Exception as e:
        logging.error(f"❌ Error during test: {e}")
        import traceback
        logging.error(traceback.format_exc())
    
    finally:
        Path(temp_file).unlink(missing_ok=True)

if __name__ == "__main__":
    test_recent_10()