@echo off
title SF Inspection Collector - Server Upload Version
color 0A

echo ============================================
echo   SF INSPECTION COLLECTOR
echo   Server Upload Version
echo ============================================
echo.

REM Set up paths
set PYTHON_EXE=C:\Users\Chris\AppData\Local\Programs\Python\Python313\python.exe
set INSTALL_DIR=%LOCALAPPDATA%\SFCollector
set SCRIPT_FILE=%INSTALL_DIR%\sf_collector_upload.py

REM Check Python
echo Checking Python...
if not exist "%PYTHON_EXE%" (
    echo ERROR: Python not found
    pause
    exit /b 1
)
"%PYTHON_EXE%" --version
echo.

REM Create directory
echo Creating installation directory...
if not exist "%INSTALL_DIR%" mkdir "%INSTALL_DIR%"

REM Write Python script that downloads PDFs and uploads to server
echo Creating collector script with server upload...
echo import os > "%SCRIPT_FILE%"
echo import sys >> "%SCRIPT_FILE%"
echo import time >> "%SCRIPT_FILE%"
echo import json >> "%SCRIPT_FILE%"
echo import base64 >> "%SCRIPT_FILE%"
echo import warnings >> "%SCRIPT_FILE%"
echo from datetime import datetime >> "%SCRIPT_FILE%"
echo warnings.filterwarnings('ignore') >> "%SCRIPT_FILE%"
echo os.environ['WDM_LOG'] = '0' >> "%SCRIPT_FILE%"
echo os.environ['WDM_PROGRESS'] = '0' >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo print('SF INSPECTION COLLECTOR - SERVER UPLOAD VERSION') >> "%SCRIPT_FILE%"
echo print('='*50) >> "%SCRIPT_FILE%"
echo print() >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo try: >> "%SCRIPT_FILE%"
echo     from selenium import webdriver >> "%SCRIPT_FILE%"
echo     from selenium.webdriver.common.by import By >> "%SCRIPT_FILE%"
echo     from selenium.webdriver.chrome.options import Options >> "%SCRIPT_FILE%"
echo     from selenium.webdriver.chrome.service import Service >> "%SCRIPT_FILE%"
echo     from webdriver_manager.chrome import ChromeDriverManager >> "%SCRIPT_FILE%"
echo     import requests >> "%SCRIPT_FILE%"
echo except ImportError: >> "%SCRIPT_FILE%"
echo     print('Installing required packages...') >> "%SCRIPT_FILE%"
echo     import subprocess >> "%SCRIPT_FILE%"
echo     subprocess.run([sys.executable, '-m', 'pip', 'install', 'selenium', 'requests', 'webdriver-manager']) >> "%SCRIPT_FILE%"
echo     from selenium import webdriver >> "%SCRIPT_FILE%"
echo     from selenium.webdriver.common.by import By >> "%SCRIPT_FILE%"
echo     from selenium.webdriver.chrome.options import Options >> "%SCRIPT_FILE%"
echo     from selenium.webdriver.chrome.service import Service >> "%SCRIPT_FILE%"
echo     from webdriver_manager.chrome import ChromeDriverManager >> "%SCRIPT_FILE%"
echo     import requests >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo # CONFIGURATION - EDIT THESE VALUES >> "%SCRIPT_FILE%"
echo SERVER_URL = 'https://cleankitchens.org/data/sf/api_receiver.php' >> "%SCRIPT_FILE%"
echo API_KEY = 'sk-sf-inspections-2025'  # Change this to your actual API key >> "%SCRIPT_FILE%"
echo COLLECT_LIMIT = 25  # Set to None to collect all >> "%SCRIPT_FILE%"
echo UPLOAD_ENABLED = True  # Set to False to disable uploads >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo # Create data folder with timestamp >> "%SCRIPT_FILE%"
echo timestamp = datetime.now().strftime('%%Y%%m%%d_%%H%%M%%S') >> "%SCRIPT_FILE%"
echo data_dir = os.path.join(os.getcwd(), f'sf_data_{timestamp}') >> "%SCRIPT_FILE%"
echo os.makedirs(data_dir, exist_ok=True) >> "%SCRIPT_FILE%"
echo os.makedirs(os.path.join(data_dir, 'json'), exist_ok=True) >> "%SCRIPT_FILE%"
echo os.makedirs(os.path.join(data_dir, 'pdfs'), exist_ok=True) >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo print('Starting Chrome browser with download settings...') >> "%SCRIPT_FILE%"
echo options = Options() >> "%SCRIPT_FILE%"
echo options.add_argument('--log-level=3') >> "%SCRIPT_FILE%"
echo options.add_argument('--disable-logging') >> "%SCRIPT_FILE%"
echo options.add_experimental_option('excludeSwitches', ['enable-logging']) >> "%SCRIPT_FILE%"
echo # Set download preferences to auto-download PDFs >> "%SCRIPT_FILE%"
echo prefs = { >> "%SCRIPT_FILE%"
echo     'download.default_directory': os.path.join(data_dir, 'pdfs'), >> "%SCRIPT_FILE%"
echo     'download.prompt_for_download': False, >> "%SCRIPT_FILE%"
echo     'download.directory_upgrade': True, >> "%SCRIPT_FILE%"
echo     'plugins.always_open_pdf_externally': True, >> "%SCRIPT_FILE%"
echo     'safebrowsing.enabled': False >> "%SCRIPT_FILE%"
echo } >> "%SCRIPT_FILE%"
echo options.add_experimental_option('prefs', prefs) >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo try: >> "%SCRIPT_FILE%"
echo     service = Service(ChromeDriverManager().install()) >> "%SCRIPT_FILE%"
echo     service.log_path = os.devnull >> "%SCRIPT_FILE%"
echo     driver = webdriver.Chrome(service=service, options=options) >> "%SCRIPT_FILE%"
echo     # Enable Chrome DevTools Protocol for download handling >> "%SCRIPT_FILE%"
echo     driver.execute_cdp_cmd('Page.setDownloadBehavior', { >> "%SCRIPT_FILE%"
echo         'behavior': 'allow', >> "%SCRIPT_FILE%"
echo         'downloadPath': os.path.join(data_dir, 'pdfs') >> "%SCRIPT_FILE%"
echo     }) >> "%SCRIPT_FILE%"
echo     print('Browser started successfully!') >> "%SCRIPT_FILE%"
echo except Exception as e: >> "%SCRIPT_FILE%"
echo     print(f'Error starting Chrome: {e}') >> "%SCRIPT_FILE%"
echo     input('Press Enter to exit...') >> "%SCRIPT_FILE%"
echo     sys.exit(1) >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo print('Accessing SF inspection site...') >> "%SCRIPT_FILE%"
echo driver.get('https://inspections.myhealthdepartment.com/san-francisco') >> "%SCRIPT_FILE%"
echo time.sleep(3) >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo if '403' in driver.title: >> "%SCRIPT_FILE%"
echo     print('ERROR: Site blocked access') >> "%SCRIPT_FILE%"
echo     driver.quit() >> "%SCRIPT_FILE%"
echo     input('Press Enter to exit...') >> "%SCRIPT_FILE%"
echo     sys.exit(1) >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo print('Site accessed successfully!') >> "%SCRIPT_FILE%"
echo print('Finding inspections...') >> "%SCRIPT_FILE%"
echo print() >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo inspections = [] >> "%SCRIPT_FILE%"
echo links = driver.find_elements(By.TAG_NAME, 'a') >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo for link in links: >> "%SCRIPT_FILE%"
echo     href = link.get_attribute('href') or '' >> "%SCRIPT_FILE%"
echo     if 'inspectionID=' in href: >> "%SCRIPT_FILE%"
echo         name = link.text.strip() >> "%SCRIPT_FILE%"
echo         if name: >> "%SCRIPT_FILE%"
echo             parts = href.split('inspectionID=') >> "%SCRIPT_FILE%"
echo             if len(parts) ^> 1: >> "%SCRIPT_FILE%"
echo                 inspection_id = parts[1] >> "%SCRIPT_FILE%"
echo                 inspections.append({'id': inspection_id, 'url': href, 'name': name}) >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo print(f'Found {len(inspections)} inspections') >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo if not inspections: >> "%SCRIPT_FILE%"
echo     print('No inspections found on page') >> "%SCRIPT_FILE%"
echo     driver.quit() >> "%SCRIPT_FILE%"
echo     input('Press Enter to exit...') >> "%SCRIPT_FILE%"
echo     sys.exit(0) >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo if COLLECT_LIMIT and len(inspections) ^> COLLECT_LIMIT: >> "%SCRIPT_FILE%"
echo     inspections = inspections[:COLLECT_LIMIT] >> "%SCRIPT_FILE%"
echo     print(f'Limiting to first {COLLECT_LIMIT} inspections') >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo print() >> "%SCRIPT_FILE%"
echo print('Starting collection...') >> "%SCRIPT_FILE%"
echo print('-'*50) >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo collected = 0 >> "%SCRIPT_FILE%"
echo pdfs_downloaded = 0 >> "%SCRIPT_FILE%"
echo uploaded = 0 >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo def upload_to_server(data, pdf_path=None): >> "%SCRIPT_FILE%"
echo     """Upload inspection data and optional PDF to server""" >> "%SCRIPT_FILE%"
echo     if not UPLOAD_ENABLED: >> "%SCRIPT_FILE%"
echo         return False >> "%SCRIPT_FILE%"
echo     try: >> "%SCRIPT_FILE%"
echo         payload = { >> "%SCRIPT_FILE%"
echo             'action': 'upload_inspection', >> "%SCRIPT_FILE%"
echo             'inspection_id': data['inspection_id'], >> "%SCRIPT_FILE%"
echo             'facility_name': data['facility_name'], >> "%SCRIPT_FILE%"
echo             'inspection_data': data >> "%SCRIPT_FILE%"
echo         } >> "%SCRIPT_FILE%"
echo         # Add PDF if available >> "%SCRIPT_FILE%"
echo         if pdf_path and os.path.exists(pdf_path): >> "%SCRIPT_FILE%"
echo             with open(pdf_path, 'rb') as f: >> "%SCRIPT_FILE%"
echo                 pdf_content = f.read() >> "%SCRIPT_FILE%"
echo                 payload['pdf_base64'] = base64.b64encode(pdf_content).decode('utf-8') >> "%SCRIPT_FILE%"
echo                 payload['pdf_filename'] = os.path.basename(pdf_path) >> "%SCRIPT_FILE%"
echo         # Send to server >> "%SCRIPT_FILE%"
echo         headers = {'X-API-Key': API_KEY, 'Content-Type': 'application/json'} >> "%SCRIPT_FILE%"
echo         response = requests.post(SERVER_URL, json=payload, headers=headers, timeout=30) >> "%SCRIPT_FILE%"
echo         if response.status_code == 200: >> "%SCRIPT_FILE%"
echo             result = response.json() >> "%SCRIPT_FILE%"
echo             if result.get('success'): >> "%SCRIPT_FILE%"
echo                 return True >> "%SCRIPT_FILE%"
echo     except Exception as e: >> "%SCRIPT_FILE%"
echo         print(f'    Upload error: {str(e)[:50]}') >> "%SCRIPT_FILE%"
echo     return False >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo for i, insp in enumerate(inspections, 1): >> "%SCRIPT_FILE%"
echo     try: >> "%SCRIPT_FILE%"
echo         print(f'\n[{i}/{len(inspections)}] Processing: {insp["name"][:50]}') >> "%SCRIPT_FILE%"
echo         # First get the inspection page data >> "%SCRIPT_FILE%"
echo         driver.get(insp['url']) >> "%SCRIPT_FILE%"
echo         time.sleep(2) >> "%SCRIPT_FILE%"
echo         inspection_page_text = driver.find_element(By.TAG_NAME, 'body').text >> "%SCRIPT_FILE%"
echo         print('  - Inspection page scraped') >> "%SCRIPT_FILE%"
echo         # >> "%SCRIPT_FILE%"
echo         # Build PDF URL and navigate to it >> "%SCRIPT_FILE%"
echo         pdf_url = f'https://inspections.myhealthdepartment.com/san-francisco/print/?task=getPrintable^&path=san-francisco^&pKey={insp["id"]},{insp["id"]}' >> "%SCRIPT_FILE%"
echo         print(f'  - Downloading PDF...') >> "%SCRIPT_FILE%"
echo         # >> "%SCRIPT_FILE%"
echo         # Clear downloads folder first >> "%SCRIPT_FILE%"
echo         pdf_path = os.path.join(data_dir, 'pdfs', f'{insp["id"]}.pdf') >> "%SCRIPT_FILE%"
echo         temp_pdf = os.path.join(data_dir, 'pdfs', 'download.pdf') >> "%SCRIPT_FILE%"
echo         # >> "%SCRIPT_FILE%"
echo         # Navigate to PDF URL - Chrome will auto-download >> "%SCRIPT_FILE%"
echo         driver.get(pdf_url) >> "%SCRIPT_FILE%"
echo         time.sleep(5)  # Wait for download >> "%SCRIPT_FILE%"
echo         # >> "%SCRIPT_FILE%"
echo         # Check if download.pdf exists and rename it >> "%SCRIPT_FILE%"
echo         pdf_downloaded = False >> "%SCRIPT_FILE%"
echo         if os.path.exists(temp_pdf): >> "%SCRIPT_FILE%"
echo             os.rename(temp_pdf, pdf_path) >> "%SCRIPT_FILE%"
echo             print(f'  - PDF saved as {insp["id"]}.pdf') >> "%SCRIPT_FILE%"
echo             pdfs_downloaded += 1 >> "%SCRIPT_FILE%"
echo             pdf_downloaded = True >> "%SCRIPT_FILE%"
echo         else: >> "%SCRIPT_FILE%"
echo             # Try looking for any PDF that was just downloaded >> "%SCRIPT_FILE%"
echo             import glob >> "%SCRIPT_FILE%"
echo             pdf_files = glob.glob(os.path.join(data_dir, 'pdfs', '*.pdf')) >> "%SCRIPT_FILE%"
echo             if pdf_files: >> "%SCRIPT_FILE%"
echo                 # Get the most recent PDF >> "%SCRIPT_FILE%"
echo                 latest_pdf = max(pdf_files, key=os.path.getctime) >> "%SCRIPT_FILE%"
echo                 if not latest_pdf.endswith(f'{insp["id"]}.pdf'): >> "%SCRIPT_FILE%"
echo                     os.rename(latest_pdf, pdf_path) >> "%SCRIPT_FILE%"
echo                     print(f'  - PDF saved as {insp["id"]}.pdf') >> "%SCRIPT_FILE%"
echo                     pdfs_downloaded += 1 >> "%SCRIPT_FILE%"
echo                     pdf_downloaded = True >> "%SCRIPT_FILE%"
echo             else: >> "%SCRIPT_FILE%"
echo                 print('  - PDF download failed') >> "%SCRIPT_FILE%"
echo         # >> "%SCRIPT_FILE%"
echo         # Save inspection data as JSON >> "%SCRIPT_FILE%"
echo         data = { >> "%SCRIPT_FILE%"
echo             'inspection_id': insp['id'], >> "%SCRIPT_FILE%"
echo             'facility_name': insp['name'], >> "%SCRIPT_FILE%"
echo             'inspection_page_text': inspection_page_text[:10000], >> "%SCRIPT_FILE%"
echo             'pdf_url': pdf_url, >> "%SCRIPT_FILE%"
echo             'pdf_downloaded': pdf_downloaded, >> "%SCRIPT_FILE%"
echo             'collected_at': datetime.now().isoformat(), >> "%SCRIPT_FILE%"
echo             'url': insp['url'] >> "%SCRIPT_FILE%"
echo         } >> "%SCRIPT_FILE%"
echo         json_file = os.path.join(data_dir, 'json', f'{insp["id"]}.json') >> "%SCRIPT_FILE%"
echo         with open(json_file, 'w', encoding='utf-8') as f: >> "%SCRIPT_FILE%"
echo             json.dump(data, f, indent=2, ensure_ascii=False) >> "%SCRIPT_FILE%"
echo         print('  - Data saved locally') >> "%SCRIPT_FILE%"
echo         collected += 1 >> "%SCRIPT_FILE%"
echo         # >> "%SCRIPT_FILE%"
echo         # Upload to server >> "%SCRIPT_FILE%"
echo         if UPLOAD_ENABLED: >> "%SCRIPT_FILE%"
echo             if upload_to_server(data, pdf_path if pdf_downloaded else None): >> "%SCRIPT_FILE%"
echo                 print('  - Uploaded to server successfully!') >> "%SCRIPT_FILE%"
echo                 uploaded += 1 >> "%SCRIPT_FILE%"
echo             else: >> "%SCRIPT_FILE%"
echo                 print('  - Server upload failed') >> "%SCRIPT_FILE%"
echo         # >> "%SCRIPT_FILE%"
echo     except Exception as e: >> "%SCRIPT_FILE%"
echo         print(f'  ERROR: {str(e)[:100]}') >> "%SCRIPT_FILE%"
echo # >> "%SCRIPT_FILE%"
echo driver.quit() >> "%SCRIPT_FILE%"
echo print() >> "%SCRIPT_FILE%"
echo print('='*50) >> "%SCRIPT_FILE%"
echo print('COLLECTION COMPLETE') >> "%SCRIPT_FILE%"
echo print('='*50) >> "%SCRIPT_FILE%"
echo print(f'Inspections collected: {collected}/{len(inspections)}') >> "%SCRIPT_FILE%"
echo print(f'PDFs downloaded: {pdfs_downloaded}') >> "%SCRIPT_FILE%"
echo if UPLOAD_ENABLED: >> "%SCRIPT_FILE%"
echo     print(f'Uploaded to server: {uploaded}') >> "%SCRIPT_FILE%"
echo print(f'Data saved locally to: {os.path.abspath(data_dir)}') >> "%SCRIPT_FILE%"
echo print() >> "%SCRIPT_FILE%"
echo # Open the folder >> "%SCRIPT_FILE%"
echo try: >> "%SCRIPT_FILE%"
echo     os.startfile(os.path.abspath(data_dir)) >> "%SCRIPT_FILE%"
echo except: >> "%SCRIPT_FILE%"
echo     pass >> "%SCRIPT_FILE%"
echo input('Press Enter to exit...') >> "%SCRIPT_FILE%"

REM Create desktop shortcut
echo Creating desktop shortcut...
(
echo @echo off
echo title SF Collector with Upload
echo cd /d "%INSTALL_DIR%"
echo "%PYTHON_EXE%" "%SCRIPT_FILE%"
echo pause
) > "%USERPROFILE%\Desktop\SF_Collector_Upload.bat"

REM Install packages
echo Installing required packages...
"%PYTHON_EXE%" -m pip install selenium requests webdriver-manager >nul 2>&1

echo.
echo ============================================
echo   INSTALLATION COMPLETE!
echo ============================================
echo.
echo Installed to: %INSTALL_DIR%
echo Desktop shortcut created: SF_Collector_Upload
echo.
echo This version will:
echo   - Download PDFs locally
echo   - Upload data to your server
echo.
echo IMPORTANT: Edit the script to set your API key!
echo Location: %SCRIPT_FILE%
echo.
echo Starting collector in 5 seconds...
timeout /t 5 >nul

REM Run the collector
echo.
echo Running collector with upload...
cd /d "%INSTALL_DIR%"
"%PYTHON_EXE%" "%SCRIPT_FILE%"

echo.
echo Press any key to close...
pause >nul