#!/usr/bin/env python3
"""
Generate more articles from different Chicago inspections
"""

import sys
import os
sys.path.insert(0, '/var/www/twin-digital-media/public_html/_sites/cleankitchens/production/scripts')

from test_article_generator import TestArticleGenerator
import weaviate

def get_more_inspections():
    """Get inspections that don't have articles yet"""
    
    client = weaviate.connect_to_local(host="localhost", port=8080)
    
    try:
        # Get existing article inspection IDs
        articles = client.collections.get("Articles")
        existing_articles = articles.query.fetch_objects(
            return_properties=["inspection_id"],
            limit=100
        )
        
        existing_ids = set()
        for obj in existing_articles.objects:
            existing_ids.add(obj.properties.get('inspection_id'))
        
        print(f"Found {len(existing_ids)} existing articles")
        
        # Get failed inspections that don't have articles
        inspections = client.collections.get("RawInspection")
        
        # Query for failed inspections
        failed_inspections = inspections.query.fetch_objects(
            where={
                "path": ["results"],
                "operator": "Equal", 
                "valueText": "Fail"
            },
            return_properties=["inspection_id", "dba_name", "city", "results", "raw_data"],
            limit=20  # Get more to have options
        )
        
        # Filter out ones we already have
        new_inspections = []
        for obj in failed_inspections.objects:
            inspection_id = obj.properties.get('inspection_id')
            if inspection_id and inspection_id not in existing_ids:
                new_inspections.append(obj.properties)
                if len(new_inspections) >= 5:  # Get 5 new ones
                    break
        
        return new_inspections
        
    finally:
        client.close()

def main():
    # Get inspections without articles
    inspections = get_more_inspections()
    
    if not inspections:
        print("No new inspections found to process")
        return
    
    print(f"\nFound {len(inspections)} inspections without articles:")
    for insp in inspections:
        print(f"  - {insp['dba_name']} (ID: {insp['inspection_id']})")
    
    # Generate articles
    generator = TestArticleGenerator()
    
    for inspection in inspections:
        print(f"\n{'='*60}")
        print(f"Processing: {inspection['dba_name']}")
        print(f"Inspection ID: {inspection['inspection_id']}")
        
        # Format for the generator
        formatted_inspection = {
            'inspection_id': inspection['inspection_id'],
            'facility_name': inspection['dba_name'],
            'city': inspection['city'],
            'results': inspection['results'],
            'raw_data': inspection['raw_data'],
            'address': generator.extract_field_from_raw(inspection['raw_data'], 'Address'),
            'violations': generator.extract_violations_from_raw(inspection['raw_data']),
            'facility_type': generator.extract_field_from_raw(inspection['raw_data'], 'Facility Type'),
            'zip_code': generator.extract_field_from_raw(inspection['raw_data'], 'Zip')
        }
        
        # Generate article
        generator.process_inspection(formatted_inspection)
    
    print("\n✅ Article generation complete!")
    
    # Clean up
    generator.weaviate_client.close()

if __name__ == "__main__":
    main()