#!/usr/bin/env python3
"""
Create Tag Pages for all existing articles
"""

import weaviate
import os
from datetime import datetime
from dotenv import load_dotenv

load_dotenv('/home/chris/.env')

def generate_slug(text):
    """Generate URL-friendly slug"""
    import re
    slug = re.sub(r'[^\w\s-]', '', text.lower())
    slug = re.sub(r'[-\s]+', '-', slug)
    return slug.strip('-')

def create_tag_pages():
    """Create tag pages for all existing article tags"""
    client = weaviate.connect_to_local()
    articles = client.collections.get("Articles")
    tag_pages = client.collections.get("TagPages")
    
    # Get all articles and their tags
    all_articles = articles.query.fetch_objects(limit=1000)
    
    tag_counts = {}
    
    # Count occurrences of each tag
    for article_obj in all_articles.objects:
        props = article_obj.properties
        auto_tags = props.get('auto_tags', [])
        
        for tag in auto_tags:
            if tag and tag.strip():
                tag_counts[tag] = tag_counts.get(tag, 0) + 1
    
    print(f"📊 Found {len(tag_counts)} unique tags from {len(all_articles.objects)} articles")
    
    created_count = 0
    
    # Create tag pages
    for tag, count in tag_counts.items():
        try:
            # Check if tag page already exists
            existing = tag_pages.query.where(
                {"path": ["tag_name"], "operator": "Equal", "valueText": tag}
            ).fetch_objects(limit=1)
            
            if existing.objects:
                # Update existing
                tag_obj = existing.objects[0]
                tag_pages.data.update(
                    uuid=tag_obj.uuid,
                    properties={
                        'article_count': count,
                        'last_updated': datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%fZ')
                    }
                )
                print(f"✅ Updated tag page: {tag} ({count} articles)")
            else:
                # Create new tag page
                tag_page_data = {
                    'tag_name': tag,
                    'slug': generate_slug(f"tag-{tag}"),
                    'title': f"{tag} - Chicago Restaurant Health Inspections",
                    'description': f"All Chicago restaurant health inspection articles tagged with '{tag}'. See the latest violations, closures, and passes for {tag.lower()} establishments.",
                    'article_count': count,
                    'created_date': datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%fZ'),
                    'last_updated': datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%fZ')
                }
                
                tag_pages.data.insert(tag_page_data)
                created_count += 1
                print(f"✅ Created tag page: {tag} ({count} articles)")
                
        except Exception as e:
            print(f"❌ Error creating tag page for '{tag}': {e}")
            continue
    
    client.close()
    print(f"\n🎉 Successfully created/updated {len(tag_counts)} tag pages!")
    print(f"📈 Tag distribution:")
    
    # Show most popular tags
    sorted_tags = sorted(tag_counts.items(), key=lambda x: x[1], reverse=True)
    for tag, count in sorted_tags[:10]:
        print(f"   {tag}: {count} articles")

if __name__ == "__main__":
    create_tag_pages()