
    hni                         d Z ddlZddlZddlZddlZddlZddlZddlmZ ej                  j                  dd        G d d      Z
d Zedk(  r e        yy)	a%  
Test Article Generator for CleanKitchens - Weaviate Version
Processes 2-3 inspection records through complete pipeline:
1. Get inspection data from Weaviate
2. Send to Claude for article generation
3. Select appropriate image
4. Save complete article back to Weaviate
5. Display on live site
    N)datetimezO/var/www/twin-digital-media/public_html/_sites/cleankitchens/production/scriptsc                   \    e Zd Zd ZddZd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zy)TestArticleGeneratorc                     d| _         d| _        d| _        t        j                  dd      | _        ddd	d
ddddddd| _        dddg ddddg ddddg dddi| _        y )Nzl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claude-3-5-sonnet-20241022z%https://api.anthropic.com/v1/messages	localhosti  )hostport)zUSDA Danger Zonezphttps://www.fsis.usda.gov/food-safety/safe-food-handling-and-preparation/food-safety-basics/danger-zone-40f-140f)zFDA Food Code 3-501.16z5https://www.fda.gov/food/fda-food-code/food-code-2022)zFDA Cold Storage RequirementszDhttps://www.fda.gov/food/buy-store-serve-safe-food/safe-food-storage)danger_zonehot_holdingcold_holding)zCDC Handwashing Guidelinesz9https://www.cdc.gov/handwashing/when-how-handwashing.html)z"FDA Preventing Cross-ContaminationzQhttps://www.fda.gov/food/buy-store-serve-safe-food/preventing-cross-contamination)handwashingcontamination)zCDC Rodent Disease Informationz/https://www.cdc.gov/rodents/diseases/index.html)zCDC Disease Transmissionz0https://www.cdc.gov/rodents/diseases/direct.html)rodentsdiseases)temperaturehygienepestsneighborhoodszfar South Side neighborhoodzOCTA Red Line at 95th Street and Electric District line at 103rd Street/Roseland)zFernwood ParkzRoseland Community HospitalzMichigan Avenue corridordescriptiontransit	landmarkszLoop business districtz'multiple CTA lines converge in the Loop)zMillennium Parkz	City HallzChicago Riverz!North Side lakefront neighborhoodzBrown and Red Lines)zLincoln Park ZoozDePaul UniversityzNorth Avenue Beach)ROSELANDDOWNTOWNzLINCOLN PARK)claude_api_keyclaude_modelclaude_api_urlweaviateconnect_to_localweaviate_clienteducational_referenceschicago_context)selfs    i/var/www/twin-digital-media/public_html/_sites/cleankitchens/production/scripts/test_article_generator.py__init__zTestArticleGenerator.__init__   s     M8E  (88kPTU
  hr !J  { "]
 ql'
#$ #@p!m $<H!R $G4!`! 
    c                 *   	 | j                   j                  j                  d      }|j                  j	                  |g d      }g }|j
                  D ]  }|j                  j                  dd      }| j                  |d      }| j                  |      }| j                  |d      }	| j                  |d      }
|j                  j                  d	      |j                  j                  d
      |j                  j                  d      |j                  j                  d      |j                  j                  d      |||	|
|t        |j                        d}|j                  |        |S # t        $ r}t        d|        g cY d}~S d}~ww xY w)z$Get inspection records from WeaviateRawInspection)inspection_iddba_namecityresultsraw_datainspection_date
source_apistatus)limitreturn_propertiesr-    AddresszFacility TypeZipr)   r*   r+   r,   r.   )r)   facility_namer+   r,   r.   address
violationsfacility_typezip_coder-   uuidzError getting inspection data: N)r    collectionsgetqueryfetch_objectsobjects
propertiesextract_field_from_rawextract_violations_from_rawstrr;   append	Exceptionprint)r#   r1   
collectionresponseinspectionsobjr-   r7   r8   r9   r:   
inspectiones                r$   get_inspection_dataz(TestArticleGenerator.get_inspection_dataG   s}   *	--99==oNJ "''55# 6 H K'' />>--j"= 55h	J!==hG
 $ ; ;Ho V66xG &)^^%7%7%H%(^^%7%7
%CNN..v6"~~11)<'*~~'9'9:K'L&",%2 ( (M
 "":.//2  	3A378I	s   E,E/ /	F8FFFc                     	 ddl }| d}|j                  ||      }|r|j                  d      j                         S 	 y#  Y yxY w)z+Extract specific field from raw_data stringr   Nz:([^,]+)   r3   )researchgroupstrip)r#   r-   
field_namerQ   patternmatchs         r$   rB   z+TestArticleGenerator.extract_field_from_rawu   sW    	#H-GIIgx0E{{1~++--  	s   ;A   Ac                 h    	 d|v r%|j                  dd      d   }|j                         S 	 y#  Y yxY w)z Extract violations from raw_datazViolations:rP   r3   )splitrT   )r#   r-   violations_parts      r$   rC   z0TestArticleGenerator.extract_violations_from_raw   sJ    	("*.."B1"E&,,.. ) 	s   (- 1c                     | j                  |d      }dddddddd	d
ddddddd}|j                  ||d         }||d   |d   d|j                         v r|d   dS |d   dS )z9Parse inspection result and get city-specific explanationResultsz	Pass/FailzkChicago uses a Pass/Fail system where restaurants either pass inspection or fail due to critical violationszJindicates critical food safety violations that pose immediate health riskszJindicates the restaurant met basic food safety standards during inspection)typer   fail_meaningpass_meaningzLetter Gradez<NYC uses A/B/C letter grades based on violation point totalsz>typically corresponds to a C grade with significant violationsz<typically corresponds to an A or B grade with few violationsz1Boston uses a Pass/Fail system similar to Chicagoz;indicates critical violations requiring immediate attentionz2indicates compliance with health code requirements)CHICAGONYCBOSTONr`   r]   r   failr^   r_   )resultsystem_typesystem_descriptionresult_meaning)rB   r=   lower)r#   r-   r+   rd   city_systems	city_infos         r$   parse_inspection_resultz,TestArticleGenerator.parse_inspection_result   s     ,,XyA
 $  M l l	 '] ` ^	 $R ] T	
* !$$T<	+BC	 $V,"+M":;AV\\^;Si7	
 	
 ZccqYr	
 	
r&   c                     d|v rd|v r| j                   d   d   S d|j                         v sd|j                         v r| j                   d   d   S d|j                         v r| j                   d   d	   S d
ddgdS )z+Get local context for Chicago neighborhoods10360628r   r   clarkloopr   lincolnLINCOLN_PARKzChicago neighborhoodzCTA bus and rail linesz!local parks and community centersr   )r"   rh   )r#   r7   r:   s      r$   get_neighborhood_contextz-TestArticleGenerator.get_neighborhood_context   s     G8 3''8DD'6W]]_+D''8DD'--/)''8HH  63AB r&   c                    |j                         g }t        fddD              r!|j                  | j                  d   d          t        fddD              r!|j                  | j                  d   d          t        fd	d
D              r!|j                  | j                  d   d          t        fddD              r!|j                  | j                  d   d          |dd S )z8Determine which educational references are most relevantc              3   &   K   | ]  }|v  
 y wN .0termviolations_lowers     r$   	<genexpr>z?TestArticleGenerator.get_relevant_references.<locals>.<genexpr>   s     ^Dt''^   )r   coldhotholdingr   r
   c              3   &   K   | ]  }|v  
 y wrv   rw   rx   s     r$   r|   z?TestArticleGenerator.get_relevant_references.<locals>.<genexpr>   s     YDt''Yr}   )handwashglover   r   r   c              3   &   K   | ]  }|v  
 y wrv   rw   rx   s     r$   r|   z?TestArticleGenerator.get_relevant_references.<locals>.<genexpr>   s     `Dt''`r}   )rodentmouseratroachpestr   r   c              3   &   K   | ]  }|v  
 y wrv   rw   rx   s     r$   r|   z?TestArticleGenerator.get_relevant_references.<locals>.<genexpr>   s     ODt''Or}   )r   crossr   N   )rh   anyrE   r!   )r#   violations_textrelevant_refsr{   s      @r$   get_relevant_referencesz,TestArticleGenerator.get_relevant_references   s    *002 ^4]^^  !<!<]!KM!Z[ Y4XYY  !<!<Y!G!VW `4_``  !<!<W!Ei!PQ O4NOO  !<!<Y!G!XYRa  r&   c                    | j                  |d   |d         }	 t        |d   t              rt        j                  |d   d      }n|d   }|j                  d      }| j                  |j                  dd            }| j                  |d   |d	         }d
}|dz  }|dz  }|dz  }|dz  }|dz  }|dz  }|dz  }|dz  }|dz  }|dz  }|dz  }|dz  }|dz  }|dz  }|dz  }|dz  }|dz  }|dz  }|dz  }|dz  }|dz  }|d z  }|d!z  }|d"z  }|d#z  }|d$z  }|d%z  }|d&z  }|d'z  }|d(z  }|d)z  }|d*z  }|d+z  }|d,z  }|d-z  }|r#|d.z  }|D ]  }|d/|d0    d1|d2    d3z  } |d3z  }|d4|d	    d5z  }|d6|d7    d3z  }|d8|d9    d3z  }|d:d;j                  |d<          d=z  }|d>z  }|d?|d@    d3z  }|dA|d    dBz  }|dC| d3z  }|dD|dE    d3z  }|j                  dF      r|dG|dF    d3z  }|dHz  }|dI|dJ    d3z  }|dK|dL    d3z  }|dM|dN    d3z  }|dO|d    d=z  }|dPz  }|dQz  }|dRz  }|dSz  }|dTz  }|dUz  }|dVz  }|dWz  }|dUz  }|S #  |d   }Y xY w)Xz0Build Claude prompt with your educational formatr7   r:   r.   z%m/%d/%Yz	%B %d, %Yr8   r3   r-   r+   zpYou are a food safety educator and journalist creating educational content about restaurant health violations.

z_Create an article that TEACHES while it INFORMS about food safety using real inspection data.

zvCRITICAL: You must follow the EXACT format specified below. Each section must be wrapped with the specified markers.

zYour article should:
zJ1. Have a compelling, specific headline that includes the restaurant name
zJ2. Report the violations factually and objectively with neutral news tone
z93. Focus on what readers can learn from these violations
z"4. Be between 600-900 words total
z<5. Include local context (neighborhood, transit, landmarks)
zJ6. Use neutral news reporter tone - NO hyperbole, NO fictional characters
z47. Include specific details about violations found

zIMPORTANT FORMATTING:
zB- Start content with <h2> (NOT <h1>) as this goes into a template
zM- Use proper heading hierarchy: <h2> for main sections, <h3> for subsections
z- Use <p> tags for paragraphs
z8- This is article CONTENT ONLY, not a complete web page
zD- Do NOT include <html>, <head>, <body> or any page structure tags

z5REQUIRED: Add 'The Food Safety Lesson' section with:
z?- 2-3 sentences explaining the food safety principles involved
zZ- MUST include inline hyperlinks to government sources using HTML: <a href="URL">text</a>
zI- Every educational statement should link to its source (FDA, CDC, USDA)
z)- One practical tip for home food safety
z7- Format section with <h3>The Food Safety Lesson</h3>

zgREQUIRED: Add 'Frequently Asked Questions' section with EXACTLY these 4 questions in this exact order:
z7QUESTION 1: 'When was this restaurant last inspected?'
z.QUESTION 2: 'What was the inspection result?'
z*QUESTION 3: 'What does this rating mean?'
z*QUESTION 4: 'What violations were found?'
zE- Use the inspection data provided to answer each question factually
z=- For Question 3, use the rating system information provided
z6- Include hyperlinks to government sources in answers
z1- Format as: <h3>Frequently Asked Questions</h3>
zS<p><strong>Q: When was this restaurant last inspected?</strong><br>A: [Answer]</p>
zJ<p><strong>Q: What was the inspection result?</strong><br>A: [Answer]</p>
zF<p><strong>Q: What does this rating mean?</strong><br>A: [Answer]</p>
zG<p><strong>Q: What violations were found?</strong><br>A: [Answer]</p>

z0Use these government references where relevant:
z- r   z: rP   
zLOCAL CONTEXT for z:
z- Neighborhood: r   z- Transit: r   z- Landmarks: z, r   z

zRESTAURANT DETAILS:
zRestaurant Name: r6   	Address: z, Chicago, IL
zInspection Date: zInspection Result: r,   r9   zFacility Type: z
RATING SYSTEM INFORMATION:
zSystem Type: re   zSystem Description: rf   zResult Meaning: rg   z
VIOLATIONS FOUND:
zFOUTPUT FORMAT - You must structure your response EXACTLY as follows:

z===META_DATA===
z=title: [55-60 char title - restaurant name + violation type]
zJdescription: [150-160 char description mentioning location and violation]
z===META_DATA===

z===CONTENT===
zF[Your complete article HTML with Food Safety Lesson and FAQ sections]
zC[Use HTML tags: <h2>, <h3>, <p>, <strong>, <a href="URL">text</a>]
)
rs   
isinstancerD   r   strptimestrftimer   r=   rk   join)	r#   rL   neighborhooddate_objr.   r   rating_infopromptrefs	            r$   build_claude_promptz(TestArticleGenerator.build_claude_prompt   sv    44Z	5JJWaLbc	<*%67=#,,Z8I-JJW%&78&//<O
 44Z^^LRT5UV 22:j3I:V\K]^ Fuu  M  	M**____NN77QQ__JJ++WWbb33MMZZJJTTqq^^>>MM||LLCC????ZZRRKKFFhh__[[]] IIF$ 4Bs1vhbQ334dNF 	&z&'9&:#>>$\-%@$ADDKY 78;;M$))L,E"F!GtLL 	))%j&A%B"EEIj34ODD%o%6b99'
9(='>bAA>>/*
?(C'DBGGF 	23M+m"<!=R@@(5I)J(K2NN$[1A%B$C2FF)*\*B)C4HH 	\\%%RR__''##[[ZZ##	<():;Os   AI I(c                 x   | j                   ddd}| j                  dd|dgdd}	 t        j                  | j                  ||d	
      }|j
                  dk(  r|j                         S t        d|j
                   d|j                          y# t        $ r!}t        dt        |              Y d}~yd}~ww xY w)zCall Claude APIz
2023-06-01zapplication/json)z	x-api-keyzanthropic-versionzcontent-typei  user)rolecontentgffffff?)model
max_tokensmessagesr   <   )headersjsontimeout   zClaude API error: z - NzClaude API exception: )r   r   requestspostr   status_coder   rG   textrF   rD   )r#   r   r   datarI   rM   s         r$   call_claude_apiz$TestArticleGenerator.call_claude_apiG  s     ,,!-.
 && #% 

	}}##	H ##s*}}&*8+?+?*@HMM?ST 	*3q6(34	s   AB )%B 	B9B44B9c                    	 |d   d   d   }ddl }|j                  d||j                        }|j                  d||j                        }i }|rp|j                  d      j	                         }|j                  d      D ]=  }d	|v s|j                  d	d      \  }	}
|
j	                         ||	j	                         <   ? |r|j                  d      j	                         nd
}||fS # t        $ r}t        d|        i d
fcY d}~S d}~ww xY w)z;Extract META_DATA and CONTENT sections from Claude responser   r   r   Nz*===META_DATA===\s*\n(.*?)\n===META_DATA===z&===CONTENT===\s*\n(.*?)\n===CONTENT===rP   r   :r3   zError extracting sections: )rQ   rR   DOTALLrS   rT   rY   rF   rG   )r#   claude_responsetext_contentrQ   
meta_matchcontent_match	meta_data	meta_textlinekeyvaluer   rM   s                r$   extract_sectionsz%TestArticleGenerator.extract_sectionsm  s   	*95a8@L #PR^`b`i`ijJII&OQ]_a_h_hiMI&,,Q/557	%OOD1 ?Dd{%)ZZQ%7
U16	#))+.?
 9Fm))!,2242Gg%% 	/s34r6M	s%   BC& AC& &	D/D DDc                     	 ddl m}  |       }|d   |d   |d   j                         dv dd}|j                  |      S # t        $ r}t        d	|        d
dddcY d}~S d}~ww xY w)z-Select appropriate image using image selectorr   )ImageSelectorr6   r8   r,   )rc   closed	violation)r6   r8   
is_closurearticle_typezError selecting image: z'/assets/images/violations/general_1.jpgz&Restaurant health inspection violationgeneral)	image_url	image_altcategoryN)image_selectorr   rh   select_image_for_articlerF   rG   )r#   rL   r   selector
image_datarM   s         r$   select_imagez!TestArticleGenerator.select_image  s    	4$H ",O!<(6(399;?QQ +	J 44Z@@ 	+A3/0FE% 	s   =A   	A'	A"A'"A'c                 H   	 	 | j                   j                  j                  d      }|j                  j	                  dgd|d   dd      }|j
                  r*t        d|d           |j
                  d   j                  S 	 dd	l}|j)                  dd|j                  d
d      j+                               j-                  d      }	t/        |	      dkD  r|	d	d j1                  dd      d   }	dd	l}|j)                  dd|      }
dj3                  |
j5                         d	d        d!z   }|j                  d
d      |	|||j                  d"d      |d   d#|d$   t7        j8                         j;                         |d   |d   g d%|d   d&d'd(}|j<                  j?                  |      }t        d)|        |S #  dd	lm	} | j                   j                  j                  d|j                  j                  j                  j                         |j                  j                  d
|j                  j                   j"                        |j                  j                  d|j                  j                   j"                        |j                  j                  d|j                  j                   j"                        |j                  j                  d|j                  j                   j"                        |j                  j                  d|j                  j                   j"                        |j                  j                  d|j                  j                   j"                        |j                  j                  d|j                  j                   j"                        |j                  j                  d|j                  j                   j"                        |j                  j                  d|j                  j                   j"                        |j                  j                  d|j                  j                   j"                        |j                  j                  d|j                  j                   j"                        |j                  j                  d|j                  j                   j$                        |j                  j                  d|j                  j                   j"                        |j                  j                  d|j                  j                   j"                        |j                  j                  d|j                  j                   j"                        g      }Y xY w# t@        $ r}t        d*|        Y d	}~y	d	}~ww xY w)+z"Save processed article to WeaviateArticlesr)   Equal)pathoperator	valueTextrP   )wherer1   u-   ⚠️ Article already exists for inspection r   Ntitle)name	data_typeslugr   excerptmeta_descriptionr+   stateestablishment_namepublished_dater   r   tagsr   r0   )vectorizer_configrA   z
[^a-z0-9]+-r3   d   z<[^>]+>    z...r   ILr6   )ChicagozHealth InspectionzFood Safetyr   	published)r   r   r   r   r   r+   r   r   r   r   r   r   r)   r   r0   u   ✓ Saved article to Weaviate: zError saving to Weaviate: )!r    r<   r=   r>   r?   r@   rG   rA   weaviate.classesclassescreateconfig	Configure
Vectorizertext2vec_transformersPropertyDataTypeTEXT
TEXT_ARRAYrQ   subrh   rT   lenrsplitr   rY   r   now	isoformatr   insertrF   )r#   rL   r   r   r   rH   existingwvcrQ   r   	text_onlyr   article_datard   rM   s                  r$   save_article_to_weaviatez-TestArticleGenerator.save_article_to_weaviate  s   Q	'!11==AA*M
 &++99!0 1$+%/%@
  :  ##I*UdJeIfgh#++A.999 $: 66-immGR.H.N.N.PQWWX[\D4y3DSz((a03 z2w7Ihhy0"56>G #w3""$-MM-$D"6*&0&A"*,,.":":"<'4'4G!+O!< +%L&  __++L9F3F8<=Mw.!11==DD&)jj&:&:&E&E&[&[&]

++CJJDWDWD\D\+]

++3::CVCVC[C[+\

++cjjFYFYF^F^+_

++cjjFYFYF^F^+_

++1CszzObObOgOg+h

++3::CVCVC[C[+\

++CJJDWDWD\D\+]

++1EQTQ[Q[QdQdQiQi+j

++1ASZZM`M`MeMe+f

++

H[H[H`H`+a

++

H[H[H`H`+a

++3::CVCVCaCa+b

++CJJL_L_LdLd+e

++3::K^K^KcKc+d

++SZZEXEXE]E]+^  E 
t  	.qc23	s,   A?F8 D4V  8OU=:V   	V!	VV!c           	         t        d       t        d       | j                  d      }|st        d       yt        dt        |       d       t        |d	      D ]  \  }}t        d
| dt        |       d       t        d|d           t        d|d           t        d|d           	 t        d       | j	                  |      }t        d       | j                  |      }|st        d       t        d       | j                  |      \  }}|st        d       t        d|j                  dd              t        dt        |       d       t        d       | j                  |      }t        d|d           t        d        | j                  ||||      }	|	ret        d!       t        d"|j                  d              t        d#|j                  dd$      j                         j                  d%d&              nt        d'       t                |t        |      k  r t        d(       t        j                  d)        t        d+       t        d,       y# t        $ r}
t        d*|
        Y d}
~
,d}
~
ww xY w)-zMain test processingz=== Test Article Generator ===z<Processing 2-3 inspection records through complete pipeline
   )r1   zNo inspection data found!NzFound z inspection records to process
rP   z=== Processing Article /z ===zRestaurant: r6   zResult: r,   r   r7   zBuilding Claude prompt...zCalling Claude API...zFailed to get Claude responsezExtracting article sections...zFailed to extract contentu   ✓ Article generated: r   Unknownz  Content length: z characterszSelecting appropriate image...u   ✓ Selected image: r   zSaving to Weaviate...u   ✓ Article saved successfully!z	  Title: z  URL will be: /r3   r   r   zFailed to save articlez)Waiting 3 seconds before next article...
r   zError processing article: z=== Test Complete ===z-Articles should now appear on your live site!)rG   rN   r   	enumerater   r   r   r=   r   r   rh   replacetimesleeprF   )r#   rJ   irL   r   r   r   r   r   rd   rM   s              r$   process_test_articlesz*TestArticleGenerator.process_test_articles  s   ./MN ..Q.7-.s;'((HIJ&{A6 7	MAz+A3aK0@/AFGLO!< =>?HZ	2345Ij345611211*= -."&"6"6v">&9: 67%)%:%:?%K"	756/	gy0Q/RST*3w<.DE 67!..z:
,Z-D,EFG -.66z9gWab;=IimmG&<%=>?,Y]]7B-G-M-M-O-W-WX[]`-a,bcd23 s;''FGJJqMg7	r 	%&=>  21#67s&   1AI*7,I*$D,I**	J3JJN)r   )__name__
__module____qualname__r%   rN   rB   rC   rk   rs   r   r   r   r   r   r   r  rw   r&   r$   r   r      sL    -
^,\
"
H"!.kZ$L40SjH?r&   r   c                      t               } 	 | j                          | j                  j                          y # | j                  j                          w xY wrv   )r   r  r    close)	generators    r$   mainr  >  s@    $&I*'')!!'')	!!'')s	   7 A__main__)__doc__ossysr   r   r   r  r   r   r   r   r  r  rw   r&   r$   <module>r     s\    
 
      d ee? e?N* zF r&   