
    h                         d Z ddlZddlZddlZddlZddlZddlZddlmZmZ ddl	m
Z
 ddlZddlmZ ddlmZ ddlZddlmZ  ed       dd	lmZ  G d
 d      Zd Zedk(  r e        yy)z

Comprehensive CleanKitchens Content Processor
Handles: Bulk upload, Daily processing, Pattern story generation
Uses Claude Haiku for cost-effective content generation (~$0.001/article)
    N)datetime	timedelta)Path)Filter)	Anthropic)load_dotenvz/home/chris/.env)violation_lookupc                      e Zd Zd*dZd Zd Zd Zd+dZd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd,dZd Z d  Z!d! Z"d" Z#d# Z$d$ Z%d% Z&d& Z'd' Z(d( Z)d) Z*y)-CleanKitchensProcessorc                    ||S t        |t              rDt        j                  |      r|S |t	        |      k(  rt        t	        |            S t        |      S t        |      j                         rt        |      j                         S |S )z>Safely convert value to string, handling NaN, None, and floats)
isinstancefloatpdisnaintstrstrip)selfvaluedefaults      j/var/www/twin-digital-media/public_html/_sites/cleankitchens/production/scripts/comprehensive_processor.py	_safe_strz CleanKitchensProcessor._safe_str   so    =NeU#wwu~&+s5z&93s5z?Is5zI%(Z%5%5%7s5z!DWD    c                 "   t        j                         | _        t        t	        j
                  d            | _        d| _        d| _        d| _	        | j                  j                  j                  d      | _        | j                  j                  j                  d      | _        | j                  j                  j                  d      | _        | j                  j                  j                  d	      | _        | j!                         | _        y )
NANTHROPIC_API_KEY)api_keyr   g        gMbP?ArticlesRawInspectionsTagPagesPatternStories)weaviateconnect_to_localweaviate_clientr   osgetenvanthropic_clientarticles_generated
total_costcost_per_articlecollectionsgetarticlesraw_inspections	tag_pagespattern_stories_build_cached_system_promptcached_system_promptr   s    r   __init__zCleanKitchensProcessor.__init__(   s    '88: )"))<O2P Q #$ % ,,88<<ZH#33??CCDTU--99==jI#33??CCDTU %)$D$D$F!r   c                 R    t        | d      r| j                  j                          y y )Nr#   )hasattrr#   closer2   s    r   __del__zCleanKitchensProcessor.__del__:   s$    4*+  &&( ,r   c                      y)z4Build system prompt that can be cached to save costsag  You are a professional food safety journalist writing for CleanKitchens.org. Write factual, neutral news articles about restaurant health inspections.

CRITICAL SAFETY RULES:
- Use ONLY government data provided
- NO speculation, fake quotes, or fabricated information  
- Use neutral, factual language: "received citations for", "violations found", "failed inspection"
- NEVER say "was closed" or "shut down" - use "cited for violations" or "did not meet standards"
- Include educational angle with government resources
- Hyperlink government sources when referencing data
- NO libel risk - stick to official inspection results only

REQUIRED ARTICLE STRUCTURE:
1. Headline (60-80 characters, factual, include restaurant name and violation type)
   - END titles with inspection date in MM/DD/YY format (e.g., "Restaurant Name Cited for Violations 08/15/25")
2. Lead paragraph (what happened, when, where - just the facts)
3. Violation details (specific citations from inspection) 
4. Educational context (what these violations mean for food safety)
5. Government resources (link to health department, FDA guidelines)
6. Local context (area description, nearby landmarks if relevant)
7. Historical context (if pattern of violations exists)

GOVERNMENT SOURCES TO REFERENCE:
- Local health department: [include link to original inspection data]
- FDA Food Code: https://www.fda.gov/food/fda-food-code/food-code-2022
- CDC Food Safety: https://www.cdc.gov/foodsafety/
- State health department guidelines

FORMAT RESPONSE AS JSON:
{
    "title": "headline here",
    "content": "full article content with <a> tags for links",
    "excerpt": "brief summary (150 chars)",
    "meta_description": "SEO description (160 chars)",
    "image_category": "violation type (e.g., 'temperature', 'cleanliness', 'equipment')"
}

Always maintain journalistic integrity while educating readers about food safety. r2   s    r   r0   z2CleanKitchensProcessor._build_cached_system_prompt>   s    $Ur   c           	         t        d|        t        d|        ddl}t        d        |j                  |      }g }|j                         D ]9  \  }}| j	                  |      }| j                  |      r)|j                  |       ; t        dt        |       d       | j                  |      }	d}
|	j                         D ]  \  }}t        d|        g }g }|D ]6  }| j                  |      r|j                  |       &|j                  |       8 |D ]|  }	 t        d	|j                  d
d              | j                  |      }| j                  |      }| j                  |      }| j                  |      }| j!                  ||       |
dz  }
~ t        |      dk\  r	 t        dt        |       d       g }|D ]$  }| j                  |      }|j                  |       & | j%                  ||      }| j                  |      }|D ]  }| j!                  ||        |
dz  }
t'        j(                  d       |
dz  dk(  st        d|
 d| j*                  d        t        d       t        d|
        t        d| j*                  d       y# t"        $ r}t        d|        Y d}~d}~ww xY w# t"        $ r}t        d|        Y d}~d}~ww xY w)z
        Process large CSV file of historical inspections
        Groups by date: individual stories for failures, group stories for passes
        u*   🔄 Starting bulk upload processing from zBatch size: r   Nu   📖 Reading CSV file...u   📊 Found z unique inspectionsu!   
📅 Processing inspections for u   ❌ Processing failure: establishment_nameUnknown   u   ❌ Error processing failure:    u   ✅ Processing z passes as group storyu#   ❌ Error processing group passes: g      ?2   u   💾 Checkpoint: z& articles generated, estimated cost: $z.2fu   
🎉 Bulk upload complete!zTotal articles: zTotal cost: $)printpandasread_csviterrows_row_to_inspection_data_is_duplicateappendlen_group_inspections_by_dateitems_is_passing_inspectionr+   _save_raw_inspection_enrich_with_local_data_generate_article_haiku_save_article_update_raw_inspection	Exception"_generate_group_pass_article_haikutimesleepr(   )r   csv_file_path
batch_sizer   dfall_inspections_rowinspection_datagrouped_by_datetotal_processedinspection_datedate_inspectionspassesfailures
inspectionfailureraw_idenriched_dataarticle_data
article_ideraw_idspass_inspectiongroup_articles                            r   process_bulk_uploadz*CleanKitchensProcessor.process_bulk_uploadj   s6   
 	:=/JKZL)* 	()R[[' kkm 	8FAs"::3?O%%o6&&7	8
 	C011DEF 99/J1@1F1F1H E	x-O-66GHI FH. 0
..z:MM*-OOJ/	0 $ 4W[[AUW`5a4bcd "66w?F %)$@$@$IM $(#?#?#NL "&!3!3L!AJ//
C#q(O#0 6{aOCK=8NOP !G+1 /!%!:!:?!Kv./
 %)$K$KFTc$dM "&!3!3M!BJ #* H33FJGH $q(O JJsO #q()/)::`aeapapqt`uvwKE	xN 	,.  123dooc234Q ! :1#>?6 ! ?sCDs2   !A9J +BK 	K)J==K	K'K""K'c                    t        d       | j                         }|st        d       yt        dt        |       d       |D ]  }	 | j                  |      r| j	                  |      }| j                  |      }| j                  |      }| j                  |      }| j                  ||       | j                  ||       | j                  |       t        d|d   dd  d	        y# t        $ r}t        d
|        Y d}~d}~ww xY w)z;
        Check for new violations and process them
        u)   🔍 Checking for new daily violations...u   📭 No new violations foundNu   📥 Found z new violations to processu   ✅ New article: titler?   ...u&   ❌ Error processing daily violation: )r@   _download_latest_violationsrG   rE   rK   rL   rM   rN   rO   _auto_tag_article_check_pattern_triggersrP   )r   new_dataviolation_datarc   rd   re   rf   rg   s           r   process_daily_violationsz/CleanKitchensProcessor.process_daily_violations   s%    	9: 33501CM?*DEF& 	N%%n5 22>B !% < <^ L  $;;MJ "//=
 ++FJ? &&z<@,,\:),w*?*D)ESIJ3	6  >qcBCs   C+BC++	D4DDc                 2   t        d       | j                         }|D ]S  }	 |d   dk\  rHt        d|d           | j                  |      }| j                  |       t        d|d   dd	  d
       U y# t        $ r}t        d|        Y d}~sd}~ww xY w)zE
        Analyze patterns and generate investigative stories
        u/   🔍 Analyzing patterns for story generation...significance_scoreP   u   📝 Generating pattern story: pattern_typeu   ✅ Pattern story created: rm   Nr?   rn   $   ❌ Error generating pattern story: )r@   _detect_patterns_generate_pattern_story_haiku_save_pattern_storyrP   )r   patternspattern
story_datarg   s        r   generate_pattern_storiesz/CleanKitchensProcessor.generate_pattern_stories  s     	?@((* 	G/0B6;GN<S;TUV "&!C!CG!LJ ,,Z87
78KCR8P7QQTUV	  <QC@As   AA55	B>BBc           	         | j                  |      }	 | j                  j                  j                  ddd| j                  d|dg      }|j
                  d   j                  }| j                  ||      }| xj                  dz  c_        | xj                  | j                  z  c_	        |S # t        $ r}t        d	|         d
}~ww xY w)zNGenerate article using Claude Haiku with cached system prompt for cost savingsclaude-3-haiku-20240307  333333?userrolecontent)model
max_tokenstemperaturesystemmessagesr   r=   u)   ❌ Error generating article with Haiku: N)_build_data_promptr&   r   creater1   r   text_parse_article_responser'   r(   r)   rP   r@   )r   rZ   data_promptresponser   re   rg   s          r   rM   z.CleanKitchensProcessor._generate_article_haiku$  s     --o>	,,55<</00"*  = 	H &&q)..G  77QL ##q(#OOt444O 	=aSAB	s   BB, ,	C	5CC	c                    | j                  |      }| j                  |j                  dd            }t        j                  |      }t        j
                  |      }dj                  g d|j                  dd       d|j                  dd       d|j                  d	d       d|j                  d
d       d|j                  dd       d|j                  dd       d|j                  dd       d|j                  dd       d|j                  dd       d| d|d    d|d    d|d    d|d    d|d    d | d!      S )"z<Build data-specific prompt for use with cached system prompt
violations zINSPECTION DATA:
Restaurant: r;   r<   

Address: address, citystate
Inspection Date: r]   
Inspection ID: inspection_id

Results: results
Violations: None reported
Risk Level: 
risk_level

VIOLATION EXPLANATIONS:
)

VIOLATION SUMMARY:
- Total violations: total_violations
- Critical violations: priority_count
- Serious violations: priority_foundation_count  
- Minor violations: 
core_count
- Worst severity level: worst_severity

LOCAL CONTEXT:
zT

Write a factual news article about this inspection following the guidelines above._format_local_contextr   r+   r	   explain_violations_for_articleget_violation_summaryjoin)r   datalocal_contextviolations_textviolation_explanationsviolation_summarys         r   r   z)CleanKitchensProcessor._build_data_promptE  sy    2248 ..,)CD!1!P!PQ`!a,BB?SV V  VXX*I67V8
V
((9i
(	)V)+V,0HHVY,G+HVHJVKO88T[]fKgJhViV ((,i89V:V )45	V6
	V
 ((9i
(	)V
*V XXlO45V6V XXlI./V0V  VV ''9:;V<V **:;<V=V ))DEFVGV  '|45!V 6!V" ++;<=#V">#V( )V(S)V 	Vr   c                    | j                  |      }| j                  |j                  dd            }t        j                  |      }t        j
                  |      }dj                  g d|j                  dd       d|j                  dd       d|j                  d	d       d|j                  d
d       d|j                  dd       d|j                  dd       d|j                  dd       d|j                  dd       d|j                  dd       d| d|d    d|d    d|d    d|d    d|d    d | d!d"d#d$      }|S )%z1Build comprehensive prompt for article generationr   r   a  You are a professional food safety journalist writing for CleanKitchens.org. Write a factual, neutral news article about this restaurant health inspection.

CRITICAL SAFETY RULES:
- Use ONLY government data provided
- NO speculation, fake quotes, or fabricated information
- Use neutral, factual language: "received citations for", "violations found", "failed inspection"
- NEVER say "was closed" or "shut down" - use "cited for violations" or "did not meet standards"
- Include educational angle with government resources
- Hyperlink government sources when referencing data
- NO libel risk - stick to official inspection results only

INSPECTION DATA:
Restaurant: r;   r<   r   r   r   r   r   r   r]   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   a  

REQUIRED ARTICLE STRUCTURE:
1. Headline (60-80 characters, factual, include restaurant name and violation type)
2. Lead paragraph (what happened, when, where - just the facts)
3. Violation details (specific citations from inspection)
4. Educational context (what these violations mean for food safety)
5. Government resources (link to health department, FDA guidelines)
6. Local context (area description, nearby landmarks if relevant)
7. Historical context (if pattern of violations exists)

GOVERNMENT SOURCES TO REFERENCE:
- Local health department: [include link to original inspection data]
- FDA Food Code: https://www.fda.gov/food/fda-food-code/food-code-2022
- CDC Food Safety: https://www.cdc.gov/foodsafety/
- State health department guidelines

FORMAT RESPONSE AS JSON:
rm   a   "headline here",
    "content": "full article content with <a> tags for links",
    "excerpt": "brief summary (150 chars)",
    "meta_description": "SEO description (160 chars)",
    "meta_keywords": "keyword1, keyword2, keyword3",
    "internal_links": ["url1", "url2"],
    "government_citations": ["gov_url1", "gov_url2"],
    "educational_angle": "key food safety lesson",
    "auto_tags": ["tag1", "tag2", "tag3"]
z

Write the article now:r   )r   r   r   r   r   r   prompts          r   _build_article_promptz,CleanKitchensProcessor._build_article_prompth  s    2248 ..,)CD!1!P!PQ`!a,BB?S= =  = XX*I67=8
= ((9i
(	)= *,= -1HHVY,G+H= IK= LP88T[]fKgJh=i= ((,i89=:= )45=6
=  ((9i
(	)!= *!=" XXlO45#="6#=$ XXlI./%=$0%=*  +=*+=0 ''9:;1=0<1=2 **:;<3=2=3=4 ))DEF5=4G5=6 '|457=667=8 ++;<=9=8>9=> ?=>?=d  	
c=vw=~ r   c                    |d   }d|d    d|d    d|d    d|d	    d
|j                  dd       d| j                  |       dddt        j                  |D cg c]  }|d   	 c}       dd}	 | j                  j
                  j                  dddd|dg      }|j                  d   j                  }t        j                  |      }|j                  |d   |d   |d   t        j                         j                         dd       | xj                  dz  c_        |S c c}w # t        $ r}t!        d|         d }~ww xY w)!z0Generate investigative pattern story using Haikur,   aY  You are an investigative food safety journalist writing for CleanKitchens.org. Write a data-driven investigative article about a pattern detected in restaurant inspections.

CRITICAL SAFETY RULES:
- Use ONLY verified government inspection data
- NO speculation about causes or blame
- Focus on pattern analysis and data trends
- Use neutral language: "data shows", "inspections reveal", "pattern indicates"
- Include educational resources about food safety
- NEVER make accusations - just report the factual pattern
- Encourage readers to "learn more" and "stay informed"

PATTERN DATA:
Pattern Type: rx   z
Total Articles: article_countz
Date Range: 
date_rangez
Significance Score: rv   z
Location: locationzMultiple locationsz

ARTICLES IN PATTERN:
a)  

REQUIRED STRUCTURE:
1. Investigative headline (pattern-focused, factual)
2. Lead paragraph (what pattern was found, significance)
3. Data analysis (numbers, trends, timeframe)
4. Individual case examples (3-4 specific inspections)
5. Educational context (what this means for food safety)
6. Government resources and data sources
7. Call to action (encouraging food safety awareness)

GOVERNMENT SOURCES TO CITE:
- Health department inspection database
- FDA resources on food safety
- CDC guidelines
- Local health department reports

FORMAT AS JSON:
rm   a0   "investigative headline",
    "content": "full investigative article with links",
    "subtitle": "compelling subtitle",
    "pattern_summary": "brief pattern description",
    "data_points": ["key statistic 1", "key statistic 2"],
    "government_citations": ["source1", "source2"],
    "article_ids": idzg,
    "meta_description": "SEO description",
    "significance_level": "High/Medium/Low based on data"
z&

Write the investigative article now:r   i  g?r   r   r   r   r   r   r   
pattern_idT)r   rx   r   published_dateauto_generatedgMb`?ry   N)r+   _format_articles_for_promptjsondumpsr&   r   r   r   r   loadsupdater   now	isoformatr(   rP   r@   )	r   pattern_datar,   ar   r   r   r   rg   s	            r   r{   z4CleanKitchensProcessor._generate_pattern_story_haiku  s     
+ N+, -o./ 0,'( )!"678 9J(<=
> ? !!(+ , -&   JJ:A$:;< =	
%_1(f	,,55<</#)f=>	 = H &&q)..GG,J *<8 ,^ <!-o!>"*,,.":":"<"&  OOu$O;  ;>  	8<=	s   D%&3B1D* *	E3EEc                    |j                  dd      }|j                  dd      }|j                  d      s(| j                  | d|       }|r|d   |d<   |d   |d<   |j                  | j                  |      | j	                  |      | j                  |      | j                  |      | j                  |      | j                  |      | j                  |      d	       |S )
z2Enrich inspection data with local context metadatar   r   r   latituder   latlng	longitude)nearby_landmarksnearby_transitnearby_schoolsnearby_attractionsarea_demographicsneighborhooddistrict)
r+   _geocode_addressr   _get_nearby_landmarks_get_nearby_transit_get_nearby_schools_get_nearby_attractions_get_area_demographics_determine_neighborhood_determine_district)r   rZ   r   r   coordss        r   rL   z.CleanKitchensProcessor._enrich_with_local_data  s     "%%i4""62. "":.**gYb+?@F.4Um
+/5e}, 	 $ : :? K"66G"66G"&">">"O!%!<!<_!M 88I00A 
 	 r   c                 4   | j                  |j                  dd            j                         }| j                  |j                  dd            j                         }g dddgddgg d	d
}|j                         D ]  \  }}||v s|dd c S  d|v rdgS g S )z&Get nearby landmarks for local contextr   r   r   )Millennium ParkzArt InstitutezWillis TowerzLincoln Park ZoozNorth Avenue BeachzWicker ParkBucktown)	Navy PierzChicago RiverzMagnificent Milelooplincoln parkwicker parkdowntownN   chicagozDowntown Chicagor   r+   lowerrI   )r   r   r   r   chicago_landmarksarea	landmarkss          r   r   z,CleanKitchensProcessor._get_nearby_landmarks&  s     ~~dhhvr2399;~~dhh~r&BCIIK I/1EF):6J	
  1668 	%OD)|# !}$	% (1D'8"#@b@r   c                     | j                  |j                  dd            j                         }g dddgdgg dd}|j                         D ]  \  }}||v s|dd	 c S  g S )
z!Get nearby public transit optionsr   r   )Red Line	Blue Linez
Green Liner   z
Brown Liner   r   Nr   r   )r   r   r   chicago_transitr   transits         r   r   z*CleanKitchensProcessor._get_nearby_transit:  s{     ~~dhh~r&BCIIK <'6'=?	
 -224 	#MD'|#r{"	# 	r   c                     g S )z%Get nearby schools for family contextr9   )r   r   s     r   r   z*CleanKitchensProcessor._get_nearby_schoolsL  	     	r   c                 |    | j                  |j                  dd            j                         }d|v sd|v rddgS g S )zGet nearby tourist attractionsr   r   r   r   r   r   r   r+   r   r   r   r   s      r   r   z.CleanKitchensProcessor._get_nearby_attractionsQ  sF     ~~dhh~r&BCIIK%<)?!233	r   c                 ~    | j                  |j                  dd            j                         }d|v sd|v ryd|v ryy)	z Get area demographic descriptionr   r   r   r   z&high-traffic business and tourist arear   z'popular residential and dining districtzlocal neighborhoodr   r   s      r   r   z-CleanKitchensProcessor._get_area_demographics[  sD    ~~dhh~r&BCIIK%<)?;|+<#r   c                 N   g }t               }|D ]q  }|j                  dd      }|j                  dd      }|j                  d|j                  dd            }|j                  d| d| d       |j                  |       s d	j	                  t        |      d
d       }	d| dt        |       d|	 dt        d      j	                  |       dddt        |       dd}
	 | j                  j                  j                  dddd|
dg      }|j                  d   j                  }| j                  ||t        |       d|rt        |      d   nddd | d!t        |       d"d#      }d	j	                  |d
d$ D cg c]  }|j                  dd%       c}      d%d"|rt        |      d   nd|j                  d&d%      |j                  dd%      |d'd(}| j                  |      }|j                  d)       |j                  d*t        |      |D cg c]  }|j                  d       c}|D cg c]  }|j                  d+       c}|d%d"d%d'd,	       | xj                   | j"                  z  c_        |S c c}w c c}w c c}w # t$        $ r}t'        d-|         d
}~ww xY w).zHGenerate positive group article for restaurants that passed on same dater;   r<   r   r   r   -  ()r   N   aP  You are a positive food safety journalist writing for CleanKitchens.org. Write an uplifting news article about restaurants that passed their health inspections on the same day.

CRITICAL SAFETY RULES:
- Use ONLY government inspection data provided
- Focus on POSITIVE food safety news
- Use encouraging language: "successfully passed", "maintained standards", "demonstrated compliance"
- Include educational angle about what passing means
- Hyperlink government sources and food safety resources
- NO speculation - stick to factual inspection results

GROUP INSPECTION DATA:
Inspection Date: z
Number of Restaurants: z
Area: z

RESTAURANTS THAT PASSED:

   a  

ARTICLE FOCUS:
- Celebrate successful food safety compliance
- Explain what it means to pass inspection
- Educational value about food safety standards
- Encourage consumer confidence in these establishments
- Link to government food safety resources

REQUIRED STRUCTURE:
1. Positive headline (celebrating successful inspections)
   - END titles with inspection date in MM/DD/YY format (e.g., "Chicago Restaurants Shine with Successful Health Inspections 08/15/25")
2. Lead paragraph (X restaurants passed inspections on DATE)
3. List of establishments with addresses
4. Educational context (what passing inspection means)
5. Food safety standards explanation
6. Government resources and guidelines
7. Encouragement for dining confidence

GOVERNMENT SOURCES TO REFERENCE:
- Local health department inspection standards
- FDA Food Code: https://www.fda.gov/food/fda-food-code/food-code-2022
- CDC Food Safety guidelines: https://www.cdc.gov/foodsafety/

FORMAT RESPONSE AS JSON:
rm   ak   "positive headline celebrating passes",
    "content": "full positive article content with <a> tags for links",
    "excerpt": "brief positive summary (150 chars)",
    "meta_description": "SEO description highlighting successful inspections (160 chars)",
    "meta_keywords": "food safety, passed inspection, clean restaurants",
    "internal_links": ["url1", "url2"],
    "government_citations": ["gov_url1", "gov_url2"],
    "educational_angle": "key food safety compliance lesson",
    "auto_tags": ["passed-inspection", "food-safety-success", "chicago"],
    "article_type": "group_pass",
    "restaurant_count": 
z!

Write the positive article now:r   r   r   r   r   r   r   z RestaurantsChicagoILgroup_rX   Pass)r]   r;   r   r   r   r   r>   r   r   
restaurant)r;   r   r   r   r   rm   r]   establishment_typez
group-pass
group_passr   )	article_typerestaurant_countestablishmentsgroup_inspection_ids	auto_tagsr   r   r   r  u)   ❌ Error generating group pass article: )setr+   rF   addr   listrG   chrr&   r   r   r   r   r   rp   r   r(   r)   rP   r@   )r   r_   r]   restaurant_listneighborhoodsra   namer   r   area_descriptionr   r   r   re   p
group_datar  rg   s                     r   rQ   z9CleanKitchensProcessor._generate_group_pass_article_haikuf  s      	,J>>"6	BD nnY	:G%>>.*..QZ:[\L""RvRy#:;l+	,  99T-%8!%<= "" #F} %  Ro   4  
 V & k7#r3	,,55<</#)f=>	 = H &&q)..G77#2),V\&B2?]+A.Y#)/):!CK=!I!B L '+iiZ`acbcZd0eUV7KR1P0e&f !2?]+A.Y'++Ir:%))'26#2&2	J ..z:I\*  ,$'KHN"O1155)=#>"OIO(PA)?(P& ! &2
! 
 OOt444O7 1f  #P(P  	=aSAB	sD   "BJ >I8A<J I=+J 1J	.J 8J 	J$JJ$c                 n   ddl m}  |t              }|D ]  }|j                  dd      }d}||   D ]M  }|j                  d      |j                  d      k(  s'|j                  d      |j                  d      k(  sK|} n |r| j	                  ||       ||   j                  |        t        |      S )z0Group inspections by date and combine duplicatesr   )defaultdictr]   unknownNr;   r   )r*   r  r  r+   _combine_duplicate_inspectionsrF   dict)r   inspectionsr  groupedra   date_keyexistingexisting_inspections           r   rH   z1CleanKitchensProcessor._group_inspections_by_date  s    +d#% 	5J!~~&7CH H'.x'8 #'++,@AZ^^ThEii'++I6*..:SS2H	 33HjI !((4!	5$ G}r   c                 p   | j                  |j                  dd            }| j                  |j                  dd            }|rc||vr_t        j                  |      st	        |      nd}t        j                  |      st	        |      nd}| d| j                  d      }||d<   | j                  |j                  dd            j                         }| j                  |j                  dd            j                         g d}	t        fd|	D              r|j                  d      |d<   |j                  d|j                  d      g      }
|j                  d      |
vr |
j                  |j                  d             |
|d<   t        d	|j                  d
       dt        |
       d       y)z;Combine duplicate inspection entries for same location/dater   r   z; r   )failconditionalout of business	not readyc              3   &   K   | ]  }|v  
 y wNr9   ).0	indicatorduplicate_results     r   	<genexpr>zHCleanKitchensProcessor._combine_duplicate_inspections.<locals>.<genexpr>  s     Qy,,Q   combined_inspection_idsr   u   🔗 Combined duplicate: r;   r  z	 entries)N)r   r+   r   r   r   r   r   anyrF   r@   rG   )r   r%  	duplicateexisting_violationsduplicate_violationsexisting_strduplicate_strcombined_violationsexisting_resultfailure_indicatorsexisting_idsr0  s              @r   r   z5CleanKitchensProcessor._combine_duplicate_inspections  s    #nnX\\,-KL#~~immL".MN$8@S$S;=77CV;W323]_L=?WWEY=ZC 45`bM%1N"]O"D"J"J4"P%8H\" ..i)DEKKM>>)--	2*FGMMOTQ>PQQ"+--	":HY  ||$=_@]?^_==)=	o >?.:*+)(,,7K*L)MRPST`PaObbklmr   c                    | j                  |j                  dd            j                         | j                  |j                  dd            j                         g d}t        fd|D              ryrt        fd|D              ryg d}t        fd	|D              ry
rHt	        t
              rj                         dk(  s%t	        t              rt        j                        ry
y)z,Determine if inspection is a pass or failurer   r   r   )r(  r)  r*  r+  criticalseriouszmajor violationc              3   &   K   | ]  }|v  
 y wr-  r9   r.  r/  results     r   r1  z@CleanKitchensProcessor._is_passing_inspection.<locals>.<genexpr>,  s     GyyF"Gr2  Fc              3   &   K   | ]  }|v  
 y wr-  r9   )r.  r/  r   s     r   r1  z@CleanKitchensProcessor._is_passing_inspection.<locals>.<genexpr>0  s     Z)i:5Zr2  )passapprovedsatisfactoryc              3   &   K   | ]  }|v  
 y wr-  r9   rB  s     r   r1  z@CleanKitchensProcessor._is_passing_inspection.<locals>.<genexpr>5  s     DyyF"Dr2  T)
r   r+   r   r4  r   r   r   r   r   r   )r   ra   r<  pass_indicatorsrC  r   s       @@r   rJ   z-CleanKitchensProcessor._is_passing_inspection   s    
y" =>DDF^^JNN<$DEKKM

 G4FGG #ZGYZZ ?DODD jS9j>N>N>PTV>V\fgqsx\y  A  F  F  GQ  R r   c                 <   t        |j                  dd            |j                  dd      |j                  dd      |j                  dd      |j                  dd      t        |j                  dd            |j                  d	d      |j                  d
d      |j                  dd      |j                  dd      t        |j                  dd            |j                  dd      |j                  dd      |j                  dd      |j                  dd      dS )z'Convert CSV row to inspection data dictr   r   dba_namer   r   r   r  zipr]   inspection_typer   r   license_facility_typeriskr   Nr   )r   r;   r   r   r   zip_coder]   rM  r   r   license_numberr  r   r   r   )r   r+   )r   rY   s     r   rD   z.CleanKitchensProcessor._row_to_inspection_data?  s     !"!=>"%''*b"9wwy"-GGFB'WWWd+CGGE2./"ww'8"="ww'8"=wwy"-'',3!#''*b"9:"%''/2">''&"-
D1d3
 	
r   c                     |j                  d      }|sy	 | j                  j                  j                  dgd|dd      }t	        |j
                        dkD  S #  Y yxY w)z"Check if inspection already existsr   FEqual)pathoperator	valueTextr=   limitr   )r+   r-   queryfetch_objectsrG   objects)r   rZ   r   r%  s       r   rE   z$CleanKitchensProcessor._is_duplicateS  sq    '++O<	++11??)*m\ @ H x''(1,,	s   AA Ac                 n   t        j                  t        j                  |d      j	                               j                         }|j                  d      t        j                  |      |t        j                         j                  d      dd}| j                  j                  j                  |      S )zSave raw inspection dataT)	sort_keysr   %Y-%m-%dT%H:%M:%S.%fZF)r   raw_data	data_hashdownload_date	processed)hashlibmd5r   r   encode	hexdigestr+   r   r   strftimer-   r   insert)r   rZ   ra  r`  s       r   rK   z+CleanKitchensProcessor._save_raw_inspectionc  s    KK

?d K R R TU__a	 -00A

?3"%\\^445LM
 ##((//99r   c                 0   |j                  d      }|r	 	 | j                  j                  j                  t	        j
                  d      j                  |      d      }t        |d      r4|j                  r(t        d| d	       |j                  d
   j                  S | j                  j                   j#                  |      }| j%                  |j                  dg              |S # t        $ r | j                  j                  j                  d      }|j                  D cg c]#  }|j                  j                  d      |k(  s"|% nc c}w }} t        dt        fd|i             }Y w xY w# t        $ r}t        d|        Y d}~d}~ww xY w)z:Save generated article to Weaviate with duplicate checkingslugr=   whererY  i  rX  objr\  u   ⚠️  Article with slug 'z$' already exists, skipping duplicater   u*   ❌ Error checking for duplicate article: Nr  )r+   r,   rZ  r[  r   by_propertyequal	TypeErrorr\  
propertiestypeobjectr5   r@   uuidrP   r   ri  _create_tag_pages)	r   re   rk  r%  existing_listrn  filteredrg   rf   s	            r   rN   z$CleanKitchensProcessor._save_articleq  s    'H	O#}}22@@$008>>tD  A  H 8Y/H4D4D7v=abc#++A.333 ]]''..|<
 	|//R@A' ! O$(MM$7$7$E$ED$E$QM/</D/DkHZHZ[aHbfjHjkkHkLtEF9y(6KLNH	O  HB1#FGGHsH   A	C' ?E4 '=E1$#EE!E1-E4 0E11E4 4	F=FFc           
         |D ]  }|rEt        |t              r|j                         r%t        |t              rt	        j
                  |      rL	 	 | j                  j                  j                  t        j                  d      j                  |      d      }t%        |d      r|j                  r|j                  d   }|j                  j                  d	d      }| j                  j&                  j)                  |j*                  |dz   t-        j.                         j1                  d
      d       n|| j3                  d|       | dd| ddt-        j.                         j1                  d
      t-        j.                         j1                  d
      d}| j                  j&                  j5                  |        y# t        $ r | j                  j                  j                  d      }|j                  D cg c]#  }|j                  j                  d      |k(  s"|% nc c}w }} t!        dt"        fd|i             }Y w xY w# t6        $ r}t9        d| d|        Y d}~nd}~ww xY w)z)Create or update tag pages for given tagstag_namer=   rl  d   rX  rn  r\  r   r   r_  )r   last_updatedru  rr  ztag-z( - Chicago Restaurant Health Inspectionsz?All Chicago restaurant health inspection articles tagged with 'z3'. See the latest violations, closures, and passes.)rz  rk  rm   descriptionr   created_dater|  u!   ❌ Error creating tag page for 'z': N)r   r   r   r   r   r   r.   rZ  r[  r   ro  rp  rq  r\  rr  r+   rs  rt  r5   r   r   ru  r   r   rh  _generate_slugri  rP   r@   )	r   tagstagr%  rn  tag_objcurrent_counttag_page_datarg   s	            r   rv  z(CleanKitchensProcessor._create_tag_pages  s>    .	C:c3/		TWY^I_dfdkdklodp*	O#~~33AA$00<BB3G  B  H 8Y/H4D4D&..q1G$+$6$6$:$:?A$NMNN''..$\\-:Q->,4LLN,C,CD[,\$ /  %( $ 3 3d3%L A$'5(P!Q)hilhm  na  (b)*(0(?(?@W(X(0(?(?@W(X%M NN''..}=U.	 ! O#~~33AAALH/7/?/?i3>>CUCUV`CaehChiiHiLtEF9y(6KLNH	O@  9#c!EFsJ   A	GD&I=I#H*%H*)!I
III	I6I11I6c                     | j                   j                  j                  |dt        j                         j                  d      |d       y)z'Update raw inspection with article linkTr_  )rc  processed_daterf   r}  N)r-   r   r   r   r   rh  )r   rc   rf   s      r   rO   z-CleanKitchensProcessor._update_raw_inspection  sD    !!((!"*,,."9"9:Q"R( 	) 	
r   c           
      J   	 |j                  d      }|j                  d      dz   }||| }ddl}|j                  dd|      }t	        j
                  |      }|j                  d      }|r{	 t        |t              r)dd	l	m
}	 |	j                  |      }
|
j                  d
      }n@t        |d      r|j                  d
      n"t        j                         j                  d
      }n#t        j                         j                  d
      }i ||}| j!                  |      }|j#                  i d| j%                  |d   |      d|dt        j                         j                  d
      d|j                  d      d|j                  d      d|d|j                  d      d|j                  d      dddddddt        j                         j                  d
      d|d|j                  dd      d|j                  dd      d|j                  dd      d|j                  dd      d | j'                  |j                  dd            i       |S #  t        j                         j                  d
      }Y yxY w# t(        $ r}t+        d!|         d}~ww xY w)"z1Parse Haiku response into structured article data{}r=   r   Nz[\x00-\x1f\x7f-\x9f]r   r]   parserr_  rh  rk  rm   r   r|  r;   r   r   r   ai_generatedTcontent_versionv2data_sourcechicago_health_deptprocessing_dater  r   r   r   r  	image_urlu$   ❌ Error parsing article response: )findrfindresubr   r   r+   r   r   dateutilr  parserh  r5   r   r   rp   r   r  _select_violation_imagerP   r@   )r   r   rZ   	start_idxend_idxjson_strr  parsedr]   r  parsed_datepublish_datecombined_datar  rg   s                  r   r   z.CleanKitchensProcessor._parse_article_response  s   ;	S)ImmC(1,Gy1H vv5r8DHZZ)F .112CDO	T!/373&,ll?&C'2';';<S'T\cdsu  ]A'?'?@W'X  GO  GS  GS  GU  G^  G^  _v  Gw  (||~667NO :9&9M..}=I MM ++F7O_M ,  7 78O P %o&9&9:N&O	
  !4!4_!E "< ++F3 ,,W5  "4 4 "8<<>#:#:;R#S Y o11,C ?..y"=  o11,C!" %o&9&9:NPR&S#$ T99/:M:Ml\^:_`% * M?T#+<<>#:#:;R#SL@  	8<=	s2   A)J ,A9I %E4J %J?J 	J"JJ"Nc                    ddl }| j                  |d      }|j                  dd|j                               }|j                  ddt	        |t
              r|j                         n
t        |            }|rC	 ddlm} t	        |t
              r|j                  |      }n|}|j                  d	      }| d| }|dd S # t        $ r}	t        d
|	        Y d}	~	!d}	~	ww xY w)z6Generate URL slug from title with optional date prefixr   Nuntitledz[^a-zA-Z0-9\s-]r   z\s+-r  z%m%d%yu'   ⚠️  Could not parse date for slug: r{  )r  r   r  r   r   r   r   r  r  r  rh  rP   r@   )
r   rm   r]   r  
safe_titlerk  r  r  date_prefixrg   s
             r   r  z%CleanKitchensProcessor._generate_slug  s     ^^E:6
vv("j.>.>.@Avvfc:dC3H4::<cRViX E+os3"(,,"?K"1K *228<%av.
 DSz  E?sCDDEs   7AB> >	CCCc                 *   ddl }|sd|j                  dd       dS | j                  |      j                         t	        fddD              rd	|j                  dd       dS t	        fd
dD              rd|j                  dd       dS t	        fddD              rd|j                  dd       dS t	        fddD              rd|j                  dd       dS t	        fddD              rd|j                  dd       dS t	        fddD              rd|j                  dd       dS t	        fddD              rd|j                  dd       dS t	        fddD              rd|j                  dd       dS t	        fdd D              rd!|j                  dd       dS t	        fd"d#D              rd$|j                  dd       dS d%v sd&v rd'|j                  dd       dS d|j                  dd       dS )(z3Select appropriate image based on violation contentr   Nz"/assets/images/violations/general_r=   r  z.jpgc              3   &   K   | ]  }|v  
 y wr-  r9   r.  wordviolations_lowers     r   r1  zACleanKitchensProcessor._select_violation_image.<locals>.<genexpr>4  s     bDt''br2  )r   tempcoldhottcsz&/assets/images/violations/temperature_c              3   &   K   | ]  }|v  
 y wr-  r9   r  s     r   r1  zACleanKitchensProcessor._select_violation_image.<locals>.<genexpr>6  s     fd))fr2  )rodentmousemicerat	droppingsz!/assets/images/violations/rodent_c              3   &   K   | ]  }|v  
 y wr-  r9   r  s     r   r1  zACleanKitchensProcessor._select_violation_image.<locals>.<genexpr>8       gd))gr2  )roach	cockroachinsectflyfliesz/assets/images/violations/pest_c              3   &   K   | ]  }|v  
 y wr-  r9   r  s     r   r1  zACleanKitchensProcessor._select_violation_image.<locals>.<genexpr>:  r  r2  )handwashsoap	sanitizerhygienez&/assets/images/violations/handwashing_c              3   &   K   | ]  }|v  
 y wr-  r9   r  s     r   r1  zACleanKitchensProcessor._select_violation_image.<locals>.<genexpr><  r  r2  )cleansanitizedirtysoilgreasez%/assets/images/violations/sanitation_c              3   &   K   | ]  }|v  
 y wr-  r9   r  s     r   r1  zACleanKitchensProcessor._select_violation_image.<locals>.<genexpr>>  s     dd))dr2  )storagestorelabeldatefifoz"/assets/images/violations/storage_c              3   &   K   | ]  }|v  
 y wr-  r9   r  s     r   r1  zACleanKitchensProcessor._select_violation_image.<locals>.<genexpr>@       bd))br2  )crosscontaminationrawcookedz./assets/images/violations/cross_contamination_c              3   &   K   | ]  }|v  
 y wr-  r9   r  s     r   r1  zACleanKitchensProcessor._select_violation_image.<locals>.<genexpr>B  r  r2  )licensepermitcertificatedocumentz(/assets/images/violations/documentation_c              3   &   K   | ]  }|v  
 y wr-  r9   r  s     r   r1  zACleanKitchensProcessor._select_violation_image.<locals>.<genexpr>D  s     id))ir2  )floorwallceilingrepairmaintainz%/assets/images/violations/structural_c              3   &   K   | ]  }|v  
 y wr-  r9   r  s     r   r1  zACleanKitchensProcessor._select_violation_image.<locals>.<genexpr>F  r  r2  )plumbwatersinkdrainleakz#/assets/images/violations/plumbing_closureclosedz"/assets/images/violations/closure_)randomrandintr   r   r4  )r   r   r  r  s      @r   r  z.CleanKitchensProcessor._select_violation_image*  s1   7q8K7LDQQ>>/:@@B b4abb;FNN1Q<O;PPTUUf6eff6v~~a7J6K4PPg6fgg4V^^Aa5H4INNg6fgg;FNN1Q<O;PPTUUg6fgg:6>>!A;N:OtTTd6cdd7q8K7LDQQb6abbCFNNSTUVDWCXX\]]g6fgg=fnnQq>Q=RRVWWi6hii:6>>!A;N:OtTTb6abb8!9L8MTRR**h:J.J7q8K7LDQQ 8q8K7LDQQr   c                    g }| j                  |j                  dd            j                         | j                  |j                  dd            j                         | j                  |j                  dd            j                         }| j                  |j                  dd            j                         }| j                  |j                  dd            j                         | j                  |j                  dd            j                         | j                  |j                  dd            j                         d	|v r|j                  d
       n+d|v r'd|v r|j                  d       n|j                  d       d|v r|j                  d       ddgdgdgdgdgdgdgdgdgdgdgdgdgd}|j	                         D ],  \  }}t        fd |D              s|j                  |       . g d!g d"d#d$gg d%g d&d'd(gg d)d*d+gd,d-gd.	}|j	                         D ],  \  }	}t        fd/|D              s|j                  |	       . d0d1gd2gd3gd4gd5gd6gd7d8gd9gd:d;gd<d=gd>gd?gd@}
|
j	                         D ]+  \  }}t        fdA|D              s|j                  |       - g dBg dCg dDg dEg dFg dGg dHg dIg dJg dKg dLg dMg dNg dOdP}|j	                         D ]-  \  }}t        fdQ|D              s|j                  |       / d	|v r|j                  dR       n/d|v rd|v r|j                  dS       nd|v r|j                  dT       dU|v r|j                  dV       g dW}t        fdX|D              r|j                  dY       |j                  dZd      }|r	 d[d\lm} |j                  t        |            }|j                  d]|j                          |j                  }|d^v r|j                  d_       nA|d`v r|j                  da       n+|dbv r|j                  dc       n|ddv r|j                  de       | j                  |j                  dfd            j                         }dg|v sdh|v r|j                  di       n3dj|v sdk|v r|j                  dl       ndm|v sdn|v r|j                  do       dpv sdpv r|j                  dq       ndrv sd2v sd4v r|j                  ds       nudtv sduv sdvv r|j                  dw       nWd#v r|j                  dx       nAdyv sdzv r|j                  d{       n'd'v r|j                  d|       n|j                  d}       t        fd~dD              r|j                  d       t        fddD              r|j                  d       t        fddD              r|j                  d       t        fddD              r|j                  d       t        fddD              r|j                  d       d0gd2gd3gd4gd5gd6gd7gd9gd>gd?gd
}|j	                         D ]+  \  }}t        fd|D              s|j                  |       - |j                  d       t        t        |D 	cg c])  }	|	st        |	t              s|	j                         s(|	+ c}	            dd }|S #  Y xY wc c}	w )z8Auto-generate reader-friendly, relevant tags for articler;   r   r   r   r   r  r   rm   r(  zFailed InspectionrE  	conditionzConditional PasszPassed Inspectionr   r  r   r   r   r   zriver northz
gold coastzold townbucktownzlogan squarewrigleyville	chinatownzlittle italypilsenzukrainian village)r   zlincoln-parkzwicker-parkzriver-northz
gold-coastzold-townr  zlogan-squarer  r  zlittle-italyr  zukrainian-villagec              3   2   K   | ]  }|v xs |v   y wr-  r9   )r.  keywordr   r;   s     r   r1  z;CleanKitchensProcessor._auto_tag_article.<locals>.<genexpr>~  s$     _7700FGw4FF_   )r  dinercafebistrogrill)	mcdonaldsburger kingkfc	taco bellsubwaywendys	pizza hutdominospizzapizzeria)coffee	starbucksdunkin)barpubtavernloungebakerypastry)grocerymarketr  cateringcatertruckmobile)	r  z	fast-foodr  zcoffee-shopr  r  r
  r  z
food-truckc              3   2   K   | ]  }|v xs |v   y wr-  r9   )r.  r  r;   r  s     r   r1  z;CleanKitchensProcessor._auto_tag_article.<locals>.<genexpr>  s&     jV]700QG?Q4QQjr  mcdonaldr  r  r  r  r  r  r  zkentucky friedr  r  zwendy'sr  zdomino'schipotlepanera)r  r  r  r  z	taco-bellz	pizza-hutr  zburger-kingr  r  r  r  c              3   &   K   | ]  }|v  
 y wr-  r9   r.  r  r;   s     r   r1  z;CleanKitchensProcessor._auto_tag_article.<locals>.<genexpr>       IW700Ir2  )r   r  r  r  
refrigeratfreezerz	tcs foods)r  sanitaryr  r  r  debris)r  r  r  r  r  )r  r  r  pest)zcross contamination
contaminatseparater  zready-to-eat)z	hand washhandwashr  towelr  )moldmildewfungus)r  r  piper  sewagezwaste water)light	illuminatbulbfixture)r  r  r  r  r  	construct)	equipmentmachine	appliancer  broken)garbagetrashwastedumpsterrefuse)r  r  shelf	container)employeeworkerstaffhealthillness)ztemperature-controlcleanlinessr  r  zcross-contaminationhandwashingr   plumbinglighting
structuralr*  r.  zfood-storagezemployee-healthc              3   >   K   | ]  }|v xs
 |v xs |v   y wr-  r9   )r.  r  r   rm   r   s     r   r1  z;CleanKitchensProcessor._auto_tag_article.<locals>.<genexpr>  s.     kW^7j(RGw,>R'UBRRks   zfailed-inspectionzconditional-passzpassed-inspectionr*  r  )r?  priority	immediatecitationr@  c              3   2   K   | ]  }|v xs |v   y wr-  r9   )r.  r  r   r   s     r   r1  z;CleanKitchensProcessor._auto_tag_article.<locals>.<genexpr>  s#     \ww*$:7(::\r  zcritical-violationr]   r   r  zyear-)   r=   r   winter)r     r>   spring)         summer)	   r     fallr   highzrisk 1z	high-riskmediumzrisk 2zmedium-risklowzrisk 3zlow-riskr  
Restaurantr  zCoffee Shopr  r  r  BarPizzar
  r  zGrocery StoreBakeryzFood Servicec              3   &   K   | ]  }|v  
 y wr-  r9   r.  r  r   s     r   r1  z;CleanKitchensProcessor._auto_tag_article.<locals>.<genexpr>       Mdtz!Mr2  )r   r  r  zTemperature Violationsc              3   &   K   | ]  }|v  
 y wr-  r9   rW  s     r   r1  z;CleanKitchensProcessor._auto_tag_article.<locals>.<genexpr>   s     Hdtz!Hr2  )r  r  r  zRodent Problemc              3   &   K   | ]  }|v  
 y wr-  r9   rW  s     r   r1  z;CleanKitchensProcessor._auto_tag_article.<locals>.<genexpr>  s     Edtz!Er2  )r  r  zPest Problemc              3   &   K   | ]  }|v  
 y wr-  r9   rW  s     r   r1  z;CleanKitchensProcessor._auto_tag_article.<locals>.<genexpr>  rX  r2  )r  r  r  zCleanliness Issuesc              3   &   K   | ]  }|v  
 y wr-  r9   rW  s     r   r1  z;CleanKitchensProcessor._auto_tag_article.<locals>.<genexpr>  s     Gdtz!Gr2  )r  r  r  zHandwashing Issues)
z
McDonald's	StarbucksSubwayzDunkin'z	Taco Bellz	Pizza HutKFCzBurger KingChipotlePanerac              3   &   K   | ]  }|v  
 y wr-  r9   r  s     r   r1  z;CleanKitchensProcessor._auto_tag_article.<locals>.<genexpr>  r  r2  NrJ  )r   r+   r   rF   rI   r4  r  r  r  r   yearmonthr  r  r   r   )r   re   r  r   r   neighborhood_keywordsr   keywordsestablishment_type_mappingr  chain_keywordschainviolation_keywordsviolation_tagcritical_keywordsr]   r  date_objrd  r   major_chains
chain_namer   r;   r  rm   r   s                         @@@@@r   rp   z(CleanKitchensProcessor._auto_tag_articleR  sa   	 "^^L,<,<=QSU,VW]]_^^L$4$4\2$FGMMO
..!1!1)R!@AGGI~~l..vr:;AAC!^^L,<,<=QSU,VW]]_..!1!1)R!@AGGI|//<=CCE W01wg%  !34  !45 Y' Z(+,)?)?'.##+,+,%+,j"5!6!
  '<&A&A&C 	/"L(_V^__  .	/ Muz*<5*5#W-"H-
&
" 8==? 	&MCjaijj  %	& %k2%jj%%+,)?+!:.#j
  .335 	(OE8III  '	( $pSE=#kM0SAYRJFS
" (:'?'?'A 	0#M8kbjkk  /	0
 W01w;'#9/0w01'Y' Y\J[\\12 '**+<bA+!<<O(<=   5!89 !J&$$X.i'$$X.i'$$X.k)$$V, ^^L$4$4\2$FGMMO
Z8z#9[)#x:'=]+j H
$:Z( --AS1S\*++{>P/PT\`rTr]+((H8J,JeWiNiU#**W%,,<N0N_-++X&^, M.LMM56H.GHH-.E.DEE^,M.LMM12G.FGG12 &,%j z%%7)?#j
 %1$6$6$8 	- JIII  ,	-
 	# Ygc#*SRUBV[^[d[d[fcghijlklm	Az hs%   B![9 ;\\\%\9[>c                      y)z*Check if new article triggers any patternsNr9   )r   re   s     r   rq   z.CleanKitchensProcessor._check_pattern_triggers#  s     	r   c                     g S )z$Detect patterns in existing articlesr9   r2   s    r   rz   z'CleanKitchensProcessor._detect_patterns(  r   r   c                     g S )z9Download latest violations from Chicago Health Departmentr9   r2   s    r   ro   z2CleanKitchensProcessor._download_latest_violations1  r   r   c                      y)zGet coordinates for addressNr9   )r   r   s     r   r   z'CleanKitchensProcessor._geocode_address6  s     r   c                      y)z/Determine neighborhood from coordinates/addressDowntownr9   r   rZ   s     r   r   z.CleanKitchensProcessor._determine_neighborhood;  s     r   c                      y)z(Determine district/ward from coordinatesz
District 1r9   rv  s     r   r   z*CleanKitchensProcessor._determine_district@  s    r   c                 \   g }|j                  d      r&|j                  ddj                  |d                 |j                  d      r&|j                  ddj                  |d                 |j                  d      r|j                  d|d           |rdj                  |      S d	S )
zFormat local context for promptr   zNear landmarks: r   r   zTransit access: r   zArea: r  zNo local context available)r+   rF   r   )r   r   contexts      r   r   z,CleanKitchensProcessor._format_local_contextD  s    88&'NN-dii=O8P.Q-RST88$%NN-dii=M8N.O-PQR88'(NNVD)<$=#>?@%,tyy!N2NNr   c                     g }|dd D ]M  }|j                  d|j                  dd       d|j                  dd       d|j                  d	d       d
       O dj                  |      S )z(Format articles for pattern story promptNr>   r  rm   r<   z: r;   r  r]   r  r  )rF   r+   r   )r   r,   	formattedarticles       r   r   z2CleanKitchensProcessor._format_articles_for_promptS  s    	| 	cGr'++gy"A!B"W[[QegpEqDrrtu|  vA  vA  BS  U^  v_  u`  `a  b  c	cyy##r   c                 L    | j                   j                  j                  |      S )zSave pattern story to Weaviate)r/   r   ri  )r   r   s     r   r|   z*CleanKitchensProcessor._save_pattern_storyZ  s    ##((//
;;r   )r   )r{  r-  )+__name__
__module____qualname__r   r3   r7   r0   rk   rt   r   rM   r   r   r{   rL   r   r   r   r   r   rQ   rH   r   rJ   rD   rE   rK   rN   rv  rO   r   r  r  rp   rq   rz   ro   r   r   r   r   r   r|   r9   r   r   r   r      s    EG$)&UXe5V,d:B!VFJXRp4A($
	$|D4n<>
( : D0d	
=~6"RPOb



O$<r   r   c                     t               } t        d       t        d       t        d       t        d       t        d       t        d       t        d      }|dk(  r6t        d	      }t        t        d
      xs d      }| j	                  ||       y |dk(  r| j                          y |dk(  r| j                          y |dk(  r!| j                          | j                          y t        d       y )Nz%CleanKitchens Comprehensive Processorz%=====================================z1. Bulk Upload Processingz2. Daily Violation Checkz3. Pattern Story Generationz$4. All Processing (Daily + Patterns)z
Select processing mode (1-4): 1zEnter path to CSV file: z Enter batch size (default 100): r{  234zInvalid choice)r   r@   inputr   rk   rt   r   )	processorchoicecsv_filerU   s       r   mainr  b  s    &(I	
12	
12	
%&	
$%	
'(	
0156F}34ABIcJ
%%h
;	3**,	3**,	3**,**, 	r   __main__)__doc__r$   sysr   rR   rd  requestsr   r   pathlibr   r!   weaviate.classes.queryr   	anthropicr   rA   r   dotenvr   violation_codes_lookupr	   r   r  r~  r9   r   r   <module>r     si    
 
     (   )      4< <J* < zF r   