#!/usr/bin/env python3
"""
Update article slugs to: /yyyy/mm/dd/complete-title-not-truncated
"""

import weaviate
import re
from datetime import datetime

def generate_full_title_slug(title, inspection_date=None):
    """
    Generate slug: /yyyy/mm/dd/complete-title-not-truncated
    """
    # Parse the date
    if inspection_date:
        try:
            if isinstance(inspection_date, str):
                if 'T' in inspection_date:
                    dt = datetime.fromisoformat(inspection_date.replace('Z', '+00:00'))
                else:
                    dt = datetime.strptime(inspection_date, '%Y-%m-%d')
            else:
                dt = inspection_date
        except:
            dt = datetime.now()
    else:
        dt = datetime.now()
    
    # Format date part
    date_part = dt.strftime('%Y/%m/%d')
    
    # Clean title - keep full title, no truncation
    title_slug = title.lower()
    # Remove special characters but keep spaces and hyphens
    title_slug = re.sub(r'[^a-z0-9\s-]', '', title_slug)
    # Replace multiple spaces with single hyphen
    title_slug = re.sub(r'\s+', '-', title_slug)
    # Remove multiple hyphens
    title_slug = re.sub(r'-+', '-', title_slug)
    # Strip leading/trailing hyphens
    title_slug = title_slug.strip('-')
    
    # Final slug - complete title, not truncated
    slug = f"{date_part}/{title_slug}"
    
    return slug

def main():
    # Connect to Weaviate
    client = weaviate.connect_to_local()
    articles = client.collections.get("Articles")
    
    print("=" * 60)
    print("UPDATING SLUGS TO FULL TITLE FORMAT")
    print("Format: /yyyy/mm/dd/complete-title-not-truncated")
    print("=" * 60)
    
    # Get all articles
    response = articles.query.fetch_objects(limit=100)
    
    print(f"\nFound {len(response.objects)} articles to update")
    print("-" * 60)
    
    updated = 0
    
    for article in response.objects:
        props = article.properties
        old_slug = props.get('slug', '')
        title = props.get('title', '')
        
        # Generate new slug with full title
        new_slug = generate_full_title_slug(
            title=title,
            inspection_date=props.get('inspection_date') or props.get('published_date')
        )
        
        # Update the article
        updates = {'slug': new_slug}
        
        # Also ensure required fields
        if not props.get('city'):
            updates['city'] = 'Chicago'
        if not props.get('state'):
            updates['state'] = 'IL'
        if not props.get('establishment_name') and props.get('facility_name'):
            updates['establishment_name'] = props.get('facility_name')
        
        # Update article
        articles.data.update(
            uuid=article.uuid,
            properties=updates
        )
        
        print(f"✓ {title[:50]}...")
        print(f"  Old: /{old_slug}")
        print(f"  New: /{new_slug}")
        print()
        
        updated += 1
    
    print("=" * 60)
    print(f"✅ Updated {updated} articles with full title slugs")
    
    # Test the new slugs
    print("\n" + "=" * 60)
    print("TESTING NEW URLS")
    print("=" * 60)
    
    response = articles.query.fetch_objects(limit=3)
    for article in response.objects:
        slug = article.properties.get('slug')
        title = article.properties.get('title', 'Unknown')
        
        print(f"\nArticle: {title[:50]}...")
        print(f"URL: http://160.153.178.131/_sites/cleankitchens/{slug}")
    
    client.close()

if __name__ == "__main__":
    main()