#!/usr/bin/env python3
"""
Regenerate tag pages from existing articles
"""

import weaviate
import os
from dotenv import load_dotenv
from collections import Counter
import re

load_dotenv('/home/chris/.env')

def regenerate_tag_pages():
    """Regenerate all tag pages from existing articles"""
    
    client = weaviate.connect_to_local()
    articles = client.collections.get("Articles")
    tag_pages = client.collections.get("TagPages")
    
    # Get all articles with their tags
    all_articles = articles.query.fetch_objects(limit=1000)
    
    print(f"📊 Found {len(all_articles.objects)} articles")
    
    # Collect all tags and count articles per tag
    tag_counts = Counter()
    
    for article_obj in all_articles.objects:
        props = article_obj.properties
        auto_tags = props.get('auto_tags', [])
        
        if auto_tags:
            for tag in auto_tags:
                if tag and tag.strip():
                    tag_counts[tag.strip()] += 1
    
    print(f"📊 Found {len(tag_counts)} unique tags")
    
    # Delete existing tag pages
    try:
        existing_tags = tag_pages.query.fetch_objects(limit=1000)
        for tag_obj in existing_tags.objects:
            tag_pages.data.delete_by_id(tag_obj.uuid)
            print(f"🗑️  Deleted existing tag: {tag_obj.properties.get('tag_name')}")
    except Exception as e:
        print(f"ℹ️  No existing tag pages to delete: {e}")
    
    # Create tag pages for each tag
    created_count = 0
    
    for tag, count in tag_counts.items():
        try:
            # Generate slug from tag name (no prefix)
            slug = re.sub(r'[^a-zA-Z0-9\s-]', '', tag.lower())
            slug = re.sub(r'\s+', '-', slug.strip())
            
            tag_page_data = {
                'tag_name': tag,
                'slug': slug,
                'title': f"{tag} - Chicago Restaurant Health Inspections",
                'description': f"All Chicago restaurant health inspection articles tagged with '{tag}'. See the latest violations, closures, and passes.",
                'article_count': count,
                'created_date': '2025-08-15T06:45:00.000Z',
                'last_updated': '2025-08-15T06:45:00.000Z'
            }
            
            tag_pages.data.insert(tag_page_data)
            print(f"✅ Created tag page: {tag} ({count} articles)")
            created_count += 1
            
        except Exception as e:
            print(f"❌ Error creating tag page for '{tag}': {e}")
            continue
    
    client.close()
    
    print(f"\n🎉 Tag page regeneration complete!")
    print(f"📊 Created {created_count} tag pages")
    
    return created_count

if __name__ == "__main__":
    regenerate_tag_pages()