#!/usr/bin/env python3
"""
Improved Auto-Tagging System
Replaces overly complex tags with simple, user-friendly ones
"""

def generate_improved_tags(article_data):
    """Generate clean, relevant tags that users actually want"""
    auto_tags = []
    
    # Extract data for tagging
    establishment_name = str(article_data.get('establishment_name', '')).lower()
    violations = str(article_data.get('violations', '')).lower()
    results = str(article_data.get('results', '')).lower()
    establishment_type = str(article_data.get('establishment_type', '')).lower()
    
    # CORE INSPECTION RESULT TAGS
    if 'fail' in results:
        auto_tags.append('Failed Inspection')
    elif 'pass' in results:
        if 'condition' in results:
            auto_tags.append('Conditional Pass')
        else:
            auto_tags.append('Passed Inspection')
    
    # ESTABLISHMENT TYPE TAGS (Simple and relevant)
    if 'restaurant' in establishment_name or 'restaurant' in establishment_type:
        auto_tags.append('Restaurant')
    elif 'coffee' in establishment_name or 'starbucks' in establishment_name or 'dunkin' in establishment_name:
        auto_tags.append('Coffee Shop')
    elif 'bar' in establishment_name or 'tavern' in establishment_name or 'pub' in establishment_name:
        auto_tags.append('Bar')
    elif 'pizza' in establishment_name:
        auto_tags.append('Pizza')
    elif 'grocery' in establishment_name or 'market' in establishment_name:
        auto_tags.append('Grocery Store')
    elif 'bakery' in establishment_name:
        auto_tags.append('Bakery')
    else:
        auto_tags.append('Food Service')
    
    # MAJOR VIOLATION TYPES (Only the important ones people care about)
    if any(word in violations for word in ['temperature', 'temp', 'tcs']):
        auto_tags.append('Temperature Violations')
    if any(word in violations for word in ['rodent', 'rat', 'mice']):
        auto_tags.append('Rodent Problem')
    if any(word in violations for word in ['roach', 'cockroach']):
        auto_tags.append('Pest Problem')
    if any(word in violations for word in ['clean', 'sanitary', 'dirty']):
        auto_tags.append('Cleanliness Issues')
    if any(word in violations for word in ['hand', 'wash', 'soap']):
        auto_tags.append('Handwashing Issues')
    
    # POPULAR CHAINS (Only major ones people search for)
    major_chains = {
        "McDonald's": ['mcdonald'],
        'Starbucks': ['starbucks'],
        'Subway': ['subway'],
        "Dunkin'": ['dunkin'],
        'Taco Bell': ['taco bell'],
        'Pizza Hut': ['pizza hut'],
        'KFC': ['kfc'],
        'Burger King': ['burger king'],
        'Chipotle': ['chipotle'],
        'Panera': ['panera']
    }
    
    for chain_name, keywords in major_chains.items():
        if any(keyword in establishment_name for keyword in keywords):
            auto_tags.append(chain_name)
    
    # Always add Chicago for all articles
    auto_tags.append('Chicago')
    
    # Remove duplicates and limit to 8 tags max
    auto_tags = list(set(auto_tags))[:8]
    
    return auto_tags

def retag_existing_articles():
    """Update tags for all existing articles"""
    import weaviate
    import os
    from dotenv import load_dotenv
    
    load_dotenv('/home/chris/.env')
    
    client = weaviate.connect_to_local()
    articles = client.collections.get("Articles")
    
    # Get all articles
    all_articles = articles.query.fetch_objects(limit=1000)
    
    updated_count = 0
    
    for article_obj in all_articles.objects:
        try:
            props = article_obj.properties
            
            # Generate new improved tags
            new_tags = generate_improved_tags(props)
            
            # Update the article
            articles.data.update(
                uuid=article_obj.uuid,
                properties={'auto_tags': new_tags}
            )
            
            updated_count += 1
            print(f"✅ Updated tags for: {props.get('title', 'Unknown')[:50]}...")
            print(f"   New tags: {new_tags}")
            
        except Exception as e:
            print(f"❌ Error updating article: {e}")
            continue
    
    client.close()
    print(f"\n🎉 Successfully retagged {updated_count} articles!")

if __name__ == "__main__":
    retag_existing_articles()