#!/usr/bin/env python3
"""
Background consolidation for remaining ChicagoTemp cases
Uses Claude Haiku to identify and consolidate duplicate inspection records
"""

import weaviate
import os
from datetime import datetime
from collections import defaultdict
from anthropic import Anthropic
from dotenv import load_dotenv
import json
import time

# Load environment variables
load_dotenv('/home/chris/.env')

class BackgroundConsolidator:
    def __init__(self):
        # Connect to Weaviate
        self.client = weaviate.connect_to_local()
        self.collection = self.client.collections.get("ChicagoTemp")
        
        # Initialize Anthropic client with Haiku
        self.anthropic = Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
        
        self.consolidated = 0
        self.skipped = 0
        self.errors = 0
    
    def find_consolidation_candidates(self):
        """Find facility+date combinations with multiple records"""
        print("🔍 Scanning for consolidation candidates...")
        
        facility_date_combos = []
        
        # Sample larger portion of database
        for batch_offset in range(0, 50000, 100):
            try:
                result = self.collection.query.fetch_objects(
                    limit=100,
                    offset=batch_offset,
                    return_properties=['inspection_id', 'facility_name', 'inspection_date', 'address', 'is_combined']
                )
                
                if not result.objects:
                    break
                    
                for obj in result.objects:
                    props = obj.properties
                    
                    # Skip already consolidated records
                    if props.get('is_combined', False):
                        continue
                        
                    facility = props.get('facility_name', '').strip()
                    date = props.get('inspection_date', '').strip()
                    address = props.get('address', '').strip()
                    inspection_id = props.get('inspection_id', '')
                    
                    if facility and date and address:
                        key = f'{facility}|{address}|{date}'
                        facility_date_combos.append({
                            'key': key,
                            'facility': facility,
                            'address': address,
                            'date': date,
                            'inspection_id': inspection_id,
                            'uuid': str(obj.uuid)
                        })
                        
            except Exception as e:
                print(f"Error at offset {batch_offset}: {e}")
                break
        
        print(f"📊 Scanned {len(facility_date_combos)} records")
        
        # Group by facility+address+date
        grouped = defaultdict(list)
        for combo in facility_date_combos:
            grouped[combo['key']].append(combo)
        
        # Find candidates
        candidates = {k: v for k, v in grouped.items() if len(v) > 1}
        
        print(f"✅ Found {len(candidates)} consolidation candidates")
        return candidates
    
    def analyze_with_haiku(self, records):
        """Use Claude Haiku to analyze if records should be consolidated"""
        
        # Create analysis prompt
        records_info = []
        for i, record in enumerate(records, 1):
            records_info.append(f"Record {i}: ID {record['inspection_id']} - {record['facility']} at {record['address']} on {record['date']}")
        
        prompt = f"""Analyze these inspection records and determine if they should be consolidated.

RECORDS:
{chr(10).join(records_info)}

Rules for consolidation:
1. CONSOLIDATE if: Same facility + Same address + Same date + Different inspection IDs
2. DO NOT consolidate if: Different addresses (separate locations)
3. DO NOT consolidate if: Already appear to be consolidated

Return ONLY a JSON response:
{{
  "should_consolidate": true/false,
  "reason": "brief explanation",
  "keep_record": 1,
  "delete_records": [2, 3]
}}"""

        try:
            response = self.anthropic.messages.create(
                model="claude-3-haiku-20240307",
                max_tokens=200,
                temperature=0.1,
                messages=[{"role": "user", "content": prompt}]
            )
            
            response_text = response.content[0].text.strip()
            
            # Parse JSON response
            if response_text.startswith('{'):
                return json.loads(response_text)
            else:
                return {"should_consolidate": False, "reason": "Invalid response format"}
                
        except Exception as e:
            print(f"❌ Haiku analysis error: {e}")
            return {"should_consolidate": False, "reason": f"Analysis error: {e}"}
    
    def consolidate_records(self, records, analysis):
        """Perform the actual consolidation"""
        try:
            keep_idx = analysis['keep_record'] - 1  # Convert to 0-based index
            delete_indices = [i - 1 for i in analysis['delete_records']]
            
            keep_record = records[keep_idx]
            delete_records = [records[i] for i in delete_indices]
            
            # Get full record data
            keep_obj = self.collection.query.fetch_object_by_id(keep_record['uuid'])
            keep_data = keep_obj.properties
            
            # Build consolidated raw_data
            violation_records = []
            inspection_ids = [keep_record['inspection_id']]
            
            # Add keep record violations
            violation_records.append(f"violation_record_1:{keep_data.get('violations', '')}")
            
            # Add delete record violations
            for i, del_record in enumerate(delete_records, 2):
                del_obj = self.collection.query.fetch_object_by_id(del_record['uuid'])
                del_data = del_obj.properties
                violation_records.append(f"violation_record_{i}:{del_data.get('violations', '')}")
                inspection_ids.append(del_record['inspection_id'])
            
            # Create consolidated raw_data
            combined_raw_data = f"""facility_name:{keep_data.get('facility_name', '')},address:{keep_data.get('address', '')},inspection_date:{keep_data.get('inspection_date', '')},inspection_ids:{','.join(inspection_ids)},{','.join(violation_records)},total_violations:{len(records)},combined_from_rows:{len(records)},consolidation_date:{datetime.now().isoformat()},consolidation_method:haiku_auto"""
            
            # Update keep record
            self.collection.data.update(
                uuid=keep_record['uuid'],
                properties={
                    'raw_data': combined_raw_data,
                    'is_combined': True,
                    'combined_from_rows': inspection_ids,
                    'violation_count': len(records),
                    'consolidated_at': datetime.now().isoformat()
                }
            )
            
            # Delete other records
            for del_record in delete_records:
                self.collection.data.delete_by_id(del_record['uuid'])
            
            print(f"✅ Consolidated {keep_record['facility']} on {keep_record['date']}")
            print(f"   Kept: {keep_record['inspection_id']}")
            print(f"   Deleted: {[r['inspection_id'] for r in delete_records]}")
            
            self.consolidated += 1
            return True
            
        except Exception as e:
            print(f"❌ Consolidation error: {e}")
            self.errors += 1
            return False
    
    def run_consolidation(self):
        """Main consolidation process"""
        print("="*60)
        print("BACKGROUND CONSOLIDATION - HAIKU POWERED")
        print("="*60)
        
        # Find candidates
        candidates = self.find_consolidation_candidates()
        
        if not candidates:
            print("✅ No consolidation candidates found")
            return
        
        print(f"\n🔄 Processing {len(candidates)} candidates...")
        
        for i, (key, records) in enumerate(candidates.items(), 1):
            facility, address, date = key.split('|')
            
            print(f"\n[{i}/{len(candidates)}] Analyzing: {facility} on {date}")
            
            # Analyze with Haiku
            analysis = self.analyze_with_haiku(records)
            
            if analysis['should_consolidate']:
                print(f"✅ Haiku says: {analysis['reason']}")
                success = self.consolidate_records(records, analysis)
                if not success:
                    self.errors += 1
            else:
                print(f"⏭️  Haiku says: {analysis['reason']}")
                self.skipped += 1
            
            # Small delay to avoid overwhelming API
            time.sleep(0.5)
        
        print("\n" + "="*60)
        print("CONSOLIDATION COMPLETE")
        print("="*60)
        print(f"✅ Consolidated: {self.consolidated} cases")
        print(f"⏭️  Skipped: {self.skipped} cases")
        print(f"❌ Errors: {self.errors} cases")
        print("="*60)

if __name__ == "__main__":
    consolidator = BackgroundConsolidator()
    try:
        consolidator.run_consolidation()
    finally:
        consolidator.client.close()