#!/usr/bin/env python3
"""
Fix article routing and implement date-based slug generation
"""

import weaviate
import re
from datetime import datetime
import json

def generate_date_based_slug(title, inspection_date=None, facility_name=None):
    """
    Generate a date-based slug format:
    YYYY/MM/DD/readable-title
    Example: 2025/08/16/portillos-chicago-fails-health-inspection
    """
    # Parse the date
    if inspection_date:
        try:
            if isinstance(inspection_date, str):
                # Handle ISO format
                if 'T' in inspection_date:
                    dt = datetime.fromisoformat(inspection_date.replace('Z', '+00:00'))
                else:
                    dt = datetime.strptime(inspection_date, '%Y-%m-%d')
            else:
                dt = inspection_date
        except:
            dt = datetime.now()
    else:
        dt = datetime.now()
    
    # Format date part
    date_part = dt.strftime('%Y/%m/%d')
    
    # Create readable title part
    text_parts = []
    
    # Add facility name if available
    if facility_name:
        # Clean facility name
        clean_facility = re.sub(r'[^a-z0-9\s-]', '', facility_name.lower())
        clean_facility = re.sub(r'\s+', '-', clean_facility)
        if clean_facility and len(clean_facility) < 30:
            text_parts.append(clean_facility)
    
    # Add title
    if title:
        # Extract key words from title
        title_lower = title.lower()
        
        # Remove common phrases
        title_clean = title_lower.replace('health inspection reveals', '')
        title_clean = title_clean.replace('critical violations at', '')
        title_clean = title_clean.replace('fails inspection', '')
        
        # Clean and limit
        title_slug = re.sub(r'[^a-z0-9\s-]', '', title_clean)
        title_slug = re.sub(r'\s+', '-', title_slug)
        title_slug = re.sub(r'-+', '-', title_slug)
        title_slug = title_slug.strip('-')[:40]
        
        if title_slug:
            text_parts.append(title_slug)
    
    # Combine parts
    if text_parts:
        text_part = '-'.join(text_parts)
    else:
        # Fallback
        text_part = 'inspection-report'
    
    # Final slug
    slug = f"{date_part}/{text_part}"
    
    return slug

def test_article_accessibility(slug):
    """Test if an article is accessible via its slug"""
    import requests
    
    # Test direct URL
    url = f"http://160.153.178.131/_sites/cleankitchens/{slug}"
    try:
        response = requests.get(url, timeout=5)
        if response.status_code == 200:
            # Check if it's showing the real article or fallback
            if 'Portillo' in response.text and 'slug' not in slug:
                return 'fallback'
            else:
                return 'success'
        else:
            return f'error_{response.status_code}'
    except:
        return 'error'

def main():
    # Connect to Weaviate
    client = weaviate.connect_to_local()
    articles = client.collections.get("Articles")
    
    print("=" * 60)
    print("FIXING ARTICLE ROUTING AND SLUG GENERATION")
    print("=" * 60)
    
    # Get all articles
    response = articles.query.fetch_objects(limit=100)
    
    print(f"\nFound {len(response.objects)} articles to process")
    print("\nUpdating slugs to date-based format...")
    print("-" * 60)
    
    updated = 0
    slug_map = {}  # Store old slug -> new slug mapping
    
    for article in response.objects:
        props = article.properties
        old_slug = props.get('slug', '')
        
        # Generate new date-based slug
        new_slug = generate_date_based_slug(
            title=props.get('title'),
            inspection_date=props.get('inspection_date') or props.get('published_date'),
            facility_name=props.get('facility_name')
        )
        
        # Make sure slug is unique by adding counter if needed
        base_slug = new_slug
        counter = 1
        while new_slug in [s['new'] for s in slug_map.values()]:
            new_slug = f"{base_slug}-{counter}"
            counter += 1
        
        # Update the article
        updates = {'slug': new_slug}
        
        # Also ensure all required fields are present
        if not props.get('city'):
            updates['city'] = 'Chicago'
        if not props.get('state'):
            updates['state'] = 'IL'
        if not props.get('establishment_name') and props.get('facility_name'):
            updates['establishment_name'] = props.get('facility_name')
        if not props.get('image_url'):
            category = props.get('violation_category', 'general')
            if 'temperature' in category.lower():
                updates['image_url'] = '/assets/images/violations/temperature_1.jpg'
            elif 'hygiene' in category.lower():
                updates['image_url'] = '/assets/images/violations/handwashing_1.jpg'
            else:
                updates['image_url'] = '/assets/images/violations/general_1.jpg'
        
        # Update article
        articles.data.update(
            uuid=article.uuid,
            properties=updates
        )
        
        # Store mapping
        slug_map[old_slug] = {'new': new_slug, 'title': props.get('title', 'Unknown')}
        
        print(f"✓ Updated: {props.get('title', 'Unknown')[:40]}...")
        print(f"  Old slug: {old_slug}")
        print(f"  New slug: {new_slug}")
        
        updated += 1
    
    print(f"\n✅ Updated {updated} articles with date-based slugs")
    
    # Test article accessibility
    print("\n" + "=" * 60)
    print("TESTING ARTICLE ACCESSIBILITY")
    print("=" * 60)
    
    # Query articles again to get updated slugs
    response = articles.query.fetch_objects(limit=5)
    
    print("\nTesting if articles are accessible via new slugs...")
    for article in response.objects[:3]:  # Test first 3
        slug = article.properties.get('slug')
        title = article.properties.get('title', 'Unknown')
        
        print(f"\nTesting: {title[:40]}...")
        print(f"  URL: /{slug}")
        
        result = test_article_accessibility(slug)
        if result == 'success':
            print(f"  ✅ Article accessible!")
        elif result == 'fallback':
            print(f"  ⚠️  Showing fallback content (routing issue)")
        else:
            print(f"  ❌ Error: {result}")
    
    # Analyze routing configuration
    print("\n" + "=" * 60)
    print("ROUTING ANALYSIS")
    print("=" * 60)
    
    print("\nCurrent routing setup:")
    print("1. .htaccess: Redirects all non-file requests to index.php")
    print("2. index.php: Checks URL path and queries Weaviate")
    print("3. functions_live.php: getArticleBySlugFromDB() fetches article")
    
    print("\nPotential issues identified:")
    print("• Date-based slugs (2025/08/16/title) might be interpreted as directories")
    print("• Need to handle both old and new slug formats")
    print("• Must ensure getArticleBySlugFromDB handles full path correctly")
    
    print("\nRecommendations:")
    print("1. ✅ Keep .htaccess as-is (it's working correctly)")
    print("2. ✅ Update index.php to handle date-based paths")
    print("3. ✅ Ensure Weaviate queries use full slug path")
    
    # Display all article URLs for verification
    print("\n" + "=" * 60)
    print("ALL ARTICLE URLS")
    print("=" * 60)
    
    response = articles.query.fetch_objects(limit=20)
    for article in response.objects:
        print(f"/{article.properties.get('slug')}")
    
    client.close()
    
    print("\n" + "=" * 60)
    print("NEXT STEPS:")
    print("1. Update index.php to properly handle date-based slugs")
    print("2. Test article display on homepage")
    print("3. Verify individual article pages work")
    print("=" * 60)

if __name__ == "__main__":
    main()