#!/usr/bin/env python3
"""
Image selector for CleanKitchens articles
Matches violations to appropriate stock images
"""

import json
import os
import random
from collections import defaultdict

class ImageSelector:
    def __init__(self):
        self.base_url = "/assets/images"
        self.metadata_path = "/var/www/twin-digital-media/public_html/_sites/cleankitchens/assets/images/generation_metadata.json"
        
        # Load metadata if exists
        self.image_metadata = self.load_metadata()
        
        # Track usage for rotation
        self.usage_counts = defaultdict(int)
        
        # Chain restaurant keywords
        self.chain_mappings = {
            'burger_chain': ['mcdonald', 'burger king', 'wendy', 'five guys', 'in-n-out', 'white castle', 'sonic'],
            'coffee_chain': ['starbucks', 'dunkin', 'peet', 'caribou', 'tim hortons', 'coffee bean'],
            'chicken_chain': ['kfc', 'popeyes', 'chick-fil-a', 'raising cane', 'church'],
            'pizza_chain': ['domino', 'pizza hut', 'papa john', 'little caesars', 'marco'],
            'mexican_chain': ['chipotle', 'taco bell', 'qdoba', 'moe southwest', 'del taco'],
            'sub_chain': ['subway', 'jimmy john', 'jersey mike', 'firehouse', 'quiznos'],
            'asian_chain': ['panda express', 'pf chang', 'pick up stix', 'pei wei'],
            'casual_chain': ['applebee', 'chili', 'olive garden', 'red lobster', 'outback']
        }
        
        # Violation keyword mappings
        self.violation_mappings = {
            'rodent': ['rodent', 'mouse', 'mice', 'rat', 'droppings', 'gnaw'],
            'roach': ['roach', 'cockroach', 'insect', 'bug', 'pest'],
            'temperature': ['temperature', 'cold holding', 'hot holding', 'cooling', 'danger zone'],
            'cleanliness': ['clean', 'sanitation', 'dirty', 'soil', 'grease', 'grime'],
            'mold': ['mold', 'mildew', 'fungus', 'moisture'],
            'cross_contamination': ['cross contaminat', 'raw', 'cooked', 'separate'],
            'handwashing': ['hand', 'wash', 'sink', 'soap', 'towel', 'sanitizer'],
            'structural': ['ceiling', 'floor', 'wall', 'crack', 'hole', 'damage'],
            'sewage': ['sewage', 'drain', 'backup', 'plumbing', 'waste']
            # Removed closure category to avoid libel issues
        }
    
    def load_metadata(self):
        """Load generated image metadata"""
        if os.path.exists(self.metadata_path):
            with open(self.metadata_path, 'r') as f:
                return json.load(f)
        return None
    
    def identify_chain(self, restaurant_name):
        """Identify if restaurant is a chain and which type"""
        name_lower = restaurant_name.lower()
        
        for chain_type, keywords in self.chain_mappings.items():
            for keyword in keywords:
                if keyword in name_lower:
                    return chain_type
        
        return None
    
    def identify_violation_type(self, violations_text):
        """Identify primary violation type from text"""
        if not violations_text:
            return 'general'
        
        violations_lower = violations_text.lower()
        scores = {}
        
        # Score each violation type based on keyword matches
        for violation_type, keywords in self.violation_mappings.items():
            score = 0
            for keyword in keywords:
                if keyword in violations_lower:
                    score += violations_lower.count(keyword)
            scores[violation_type] = score
        
        # Return highest scoring violation type
        if scores:
            best_match = max(scores, key=scores.get)
            if scores[best_match] > 0:
                return best_match
        
        return 'general'
    
    def get_available_images(self, category, subfolder):
        """Get list of available images for a category"""
        images = []
        
        # First check metadata
        if self.image_metadata and 'images' in self.image_metadata:
            for img in self.image_metadata['images']:
                if img['category'] == category and img['subfolder'] == subfolder:
                    images.append(img['filename'])
        
        # Fallback to directory listing
        if not images:
            dir_path = f"/var/www/twin-digital-media/public_html/_sites/cleankitchens/assets/images/{subfolder}"
            if os.path.exists(dir_path):
                for file in os.listdir(dir_path):
                    if file.startswith(f"{category}_") and file.endswith('.jpg'):
                        images.append(file)
        
        return images
    
    def select_least_used_image(self, images, category):
        """Select the least used image from available options"""
        if not images:
            return None
        
        # Find image with lowest usage count
        min_usage = float('inf')
        best_image = images[0]
        
        for image in images:
            usage_key = f"{category}_{image}"
            usage = self.usage_counts[usage_key]
            
            if usage < min_usage:
                min_usage = usage
                best_image = image
            elif usage == min_usage and random.random() > 0.5:
                # Random selection among equally used images
                best_image = image
        
        # Increment usage count
        usage_key = f"{category}_{best_image}"
        self.usage_counts[usage_key] += 1
        
        return best_image
    
    def select_image_for_article(self, inspection_data):
        """
        Select appropriate stock image based on inspection data
        
        Priority order:
        1. RODENT violations (TOP PRIORITY)
        2. Cuisine-specific images (if available)
        3. Closures
        4. Other specific violations
        5. Chain restaurants
        6. General violations
        
        Args:
            inspection_data: dict with keys:
                - facility_name: Restaurant name
                - violations: Violation text
                - is_closure: Boolean
                - article_type: 'violation', 'daily_summary', 'weekly_summary'
                - cuisine_type: Optional cuisine type
        
        Returns:
            dict with:
                - image_url: Full URL path to image
                - image_alt: Alt text for image
                - category: Category selected
        """
        
        # Check for summary articles first
        article_type = inspection_data.get('article_type', 'violation')
        
        if article_type == 'daily_summary':
            images = self.get_available_images('daily_summary', 'summaries')
            if images:
                selected = self.select_least_used_image(images, 'daily_summary')
                return {
                    'image_url': f"{self.base_url}/summaries/{selected}",
                    'image_alt': "Daily restaurant inspection summary report - cleankitchens.org",
                    'category': 'daily_summary'
                }
        
        elif article_type == 'weekly_summary':
            images = self.get_available_images('weekly_summary', 'summaries')
            if images:
                selected = self.select_least_used_image(images, 'weekly_summary')
                return {
                    'image_url': f"{self.base_url}/summaries/{selected}",
                    'image_alt': "Weekly restaurant inspection summary report - cleankitchens.org",
                    'category': 'weekly_summary'
                }
        
        # For violation articles, check priority order
        violations_text = inspection_data.get('violations', '')
        
        # PRIORITY 1: RODENT violations (TOP PRIORITY)
        if violations_text:
            violations_lower = violations_text.lower()
            if any(word in violations_lower for word in ['rodent', 'mouse', 'mice', 'rat', 'droppings', 'gnaw']):
                images = self.get_available_images('rodent', 'violations')
                if images:
                    selected = self.select_least_used_image(images, 'rodent')
                    return {
                        'image_url': f"{self.base_url}/violations/{selected}",
                        'image_alt': "Restaurant health violation - rodent activity - cleankitchens.org",
                        'category': 'rodent'
                    }
        
        # PRIORITY 2: Cuisine-specific images (if available)
        cuisine_type = inspection_data.get('cuisine_type')
        if cuisine_type:
            # Check if we have cuisine-specific images
            images = self.get_available_images(cuisine_type, 'cuisine')
            if images:
                selected = self.select_least_used_image(images, cuisine_type)
                return {
                    'image_url': f"{self.base_url}/cuisine/{selected}",
                    'image_alt': f"{cuisine_type.replace('_', ' ').title()} restaurant - cleankitchens.org",
                    'category': cuisine_type
                }
        
        # PRIORITY 3: Skip closure images - use general instead for safety
        if inspection_data.get('is_closure'):
            # Use general images instead of closure to avoid any libel issues
            images = self.get_available_images('general', 'violations')
            if images:
                selected = self.select_least_used_image(images, 'general')
                return {
                    'image_url': f"{self.base_url}/violations/{selected}",
                    'image_alt': "Restaurant health inspection - cleankitchens.org",
                    'category': 'general'
                }
        
        # PRIORITY 4: Check for other specific violations
        violation_type = self.identify_violation_type(violations_text)
        
        if violation_type != 'general':
            images = self.get_available_images(violation_type, 'violations')
            if images:
                selected = self.select_least_used_image(images, violation_type)
                alt_text = f"Restaurant health violation - {violation_type.replace('_', ' ')} - cleankitchens.org"
                return {
                    'image_url': f"{self.base_url}/violations/{selected}",
                    'image_alt': alt_text,
                    'category': violation_type
                }
        
        # PRIORITY 5: Check for chain restaurants
        restaurant_name = inspection_data.get('facility_name', '')
        chain_type = self.identify_chain(restaurant_name)
        
        if chain_type:
            images = self.get_available_images(chain_type, 'chains')
            if images:
                selected = self.select_least_used_image(images, chain_type)
                alt_text = f"{chain_type.replace('_', ' ').title()} restaurant inspection - cleankitchens.org"
                return {
                    'image_url': f"{self.base_url}/chains/{selected}",
                    'image_alt': alt_text,
                    'category': chain_type
                }
        
        # PRIORITY 6: Default to general violation image
        images = self.get_available_images('general', 'violations')
        if images:
            selected = self.select_least_used_image(images, 'general')
        else:
            # Ultimate fallback
            selected = 'general_1.jpg'
        
        return {
            'image_url': f"{self.base_url}/violations/{selected}",
            'image_alt': "Restaurant health inspection violation - cleankitchens.org",
            'category': 'general'
        }
    
    def get_usage_stats(self):
        """Return current usage statistics"""
        return dict(self.usage_counts)


# Example usage
if __name__ == "__main__":
    selector = ImageSelector()
    
    # Test with Frankie's data
    test_data = {
        'facility_name': "FRANKIE'S FAST FOOD CORNER",
        'violations': "OBSERVED NO HAND WASHING SINK IN REAR DISH WASHING AREA",
        'is_closure': False,
        'article_type': 'violation'
    }
    
    result = selector.select_image_for_article(test_data)
    print(f"Selected: {result['image_url']}")
    print(f"Alt text: {result['image_alt']}")
    print(f"Category: {result['category']}")
    
    # Test with closure
    test_data2 = {
        'facility_name': "McDonald's",
        'violations': "Rodent activity found",
        'is_closure': True,
        'article_type': 'violation'
    }
    
    result2 = selector.select_image_for_article(test_data2)
    print(f"\nClosure - Selected: {result2['image_url']}")
    print(f"Alt text: {result2['image_alt']}")
    print(f"Category: {result2['category']}")