/ .env.example
.env.example
1 # ───────────────────────────────────────────────────────────────────────────── 2 # .env — Core pipeline and operational configuration 3 # ───────────────────────────────────────────────────────────────────────────── 4 # This is the main config file. API keys live in .env.secrets, agent system 5 # config lives in .env.agents. All three are loaded by src/utils/load-env.js. 6 # 7 # Values set in .env take priority over .env.secrets and .env.agents. 8 9 # ─── ZenRows Configuration ────────────────────────────────────────────────── 10 # Vendor limits: Free=5 concurrent/1,000 req/day; Developer=10/unlimited; Business=100/unlimited 11 # Default: 19 (5% below Business plan baseline of 20) — increase for higher-tier plans 12 ZENROWS_CONCURRENCY=19 13 # Daily request quota (Free/Developer: 1,000/day). Set to 0 to disable cap. 14 # ZENROWS_DAILY_LIMIT removed — monthly subscription has no daily limit (confirmed 2026-03-06) 15 # Enable premium proxy for geo-targeting (required for multi-country support) 16 ZENROWS_PREMIUM=false 17 # ZenRows subscription cost for status cost display (flat monthly, not per-request) 18 ZENROWS_MONTHLY_COST=69.99 19 # ZenRows billing cycle day-of-month (billing date each month) 20 ZENROWS_BILLING_DAY=3 21 # Countries needing extended ZenRows timeouts (comma-separated ISO codes) 22 # Known slow: JP (Japan), ID (Indonesia) — add others as observed via logs 23 ZENROWS_SLOW_COUNTRIES=JP,ID 24 ZENROWS_SLOW_TIMEOUT=300000 25 26 # ─── Claude Model Tiers ────────────────────────────────────────────────────── 27 # Update these to upgrade all Claude usage at once (OpenRouter format) 28 CLAUDE_SONNET_MODEL=anthropic/claude-sonnet-4-6 29 CLAUDE_HAIKU_MODEL=anthropic/claude-haiku-4-5 30 CLAUDE_OPUS_MODEL=anthropic/claude-opus-4 31 32 # ─── LLM Model Configuration ──────────────────────────────────────────────── 33 # OpenRouter format: provider/model-name (works with both OpenRouter and Anthropic) 34 SCORING_MODEL=openai/gpt-4o-mini # Initial AI vision scoring of screenshots 35 PROPOSAL_MODEL=anthropic/claude-haiku-4-5 # Generate personalised proposals 36 POLISH_MODEL=google/gemini-2.0-flash-001 # Polish/grammar-check proposals (cheap, fast) 37 ENRICHMENT_MODEL=openai/gpt-4o-mini # Extract contact info from web pages 38 VISION_MODEL=openai/gpt-4o-mini # General-purpose vision model 39 CLASSIFICATION_MODEL=anthropic/claude-haiku-4-5 # Reply classification (fast, cheap) 40 AUDIT_REPORT_MODEL=anthropic/claude-opus-4 # Premium audit reports (Opus + extended thinking) 41 42 # Sage AI Auto-Fix (optional — only needed for automated quality fixes) 43 #SAGE_AUTOFIX_MODEL=claude-sonnet-4-6 44 #SAGE_AUTOFIX_BRANCH=sage-autofix 45 46 # ─── OpenRouter Credit Monitoring ──────────────────────────────────────────── 47 # Alert threshold in USD (warning when low, critical when exhausted) 48 OPENROUTER_CREDIT_THRESHOLD=10.0 49 50 # ─── LLM Cost Guardrails ──────────────────────────────────────────────────── 51 # Daily spending cap in USD (hard limit, blocks calls when exceeded) 52 LLM_DAILY_BUDGET=50 53 # Hourly spending alert threshold in USD (warning only, does not block) 54 LLM_HOURLY_ALERT=10 55 56 # ─── Scoring & Proposals ──────────────────────────────────────────────────── 57 # Sites scoring at or below this are eligible for proposals 58 # B (82) and below: B- (70-81), C (50-69), D (30-49), E (0-29) 59 LOW_SCORE_CUTOFF=82 60 61 # Only mention competitor score if it's at least this many points higher 62 COMPETITOR_SCORE_THRESHOLD=10 63 64 # 'true' = LLM proposals ($0.18/site via OpenRouter), 'false' = template-based (zero cost) 65 USE_LLM_PROPOSALS=true 66 67 # Maximum related keywords to fetch per seed keyword (DataForSEO) 68 KEYWORD_EXPANSION_LIMIT=50 69 70 # ─── Vision & Screenshot Configuration ────────────────────────────────────── 71 # true: Full pipeline with screenshots + vision analysis → assets → scoring → rescoring → enrich 72 # ⚠️ INCURS LLM API COSTS: ~$0.030/site via OpenRouter (outside Claude Max subscription) 73 # false: HTML-only mode, no screenshots, skip rescoring (83% cost savings — recommended) 74 ENABLE_VISION=true 75 76 # LLM Scoring: false = use programmatic rule-based scoring (no API cost) 77 # true = use LLM scoring via OpenRouter (GPT-4o-mini). 78 # Claude Max orchestrator handles the zero-cost path separately. 79 # Default: true (LLM scoring enabled) 80 ENABLE_LLM_SCORING=true 81 82 # LLM Enrichment: false = regex-only contact extraction from HTML (no API cost) 83 # true = use LLM for location/business name extraction via OpenRouter. 84 # Claude Max orchestrator handles the zero-cost path separately. 85 # Default: true (LLM enrichment enabled) 86 ENABLE_ENRICHMENT_LLM=true 87 88 # DEPRECATED: Use ENABLE_VISION instead (ignored if ENABLE_VISION is set) 89 # USE_COMPUTER_VISION_SCORING=true 90 # USE_COMPUTER_VISION_RESCORING=true 91 # USE_COMPUTER_VISION_ENRICHMENT=true 92 # ENABLE_SCREENSHOT_CAPTURE=true 93 94 # ─── API Rate Limits ──────────────────────────────────────────────────────── 95 # Throughput set safely below vendor and regulatory limits. 96 # Twilio + Resend are 30% below vendor limits; others are 2–5% below. 97 # See src/utils/rate-limiter.js for where these are applied. 98 99 # OpenRouter: 200 RPM paid plan; 20 RPM free tier 100 OPENROUTER_REQUESTS_PER_MINUTE=194 101 OPENROUTER_MAX_CONCURRENT=5 102 103 # Twilio SMS: Long code = 1 SMS/sec; Toll-free = 3 SMS/sec; Short code = 100 SMS/sec 104 # TCPA/A2P compliance enforced separately in compliance.js 105 TWILIO_REQUESTS_PER_SECOND=0.7 106 TWILIO_MAX_CONCURRENT=1 107 # Pre-tracking SMS spend not recorded in DB — added to lifetime cost estimate in npm run status 108 # Set to actual spend before tracking began (default $7.30 = Feb spend before tracking started) 109 TWILIO_COST_OFFSET_USD=7.30 110 111 # Resend: 10 req/sec all paid plans 112 RESEND_REQUESTS_PER_SECOND=1.5 # actual plan limit: 2 req/sec (docs say 10 but plan enforces 2) 113 RESEND_MAX_CONCURRENT=1 114 115 # DataForSEO: 2,000 req/min; 100 concurrent 116 DATAFORSEO_REQUESTS_PER_MINUTE=1940 117 DATAFORSEO_MAX_CONCURRENT=97 118 119 # ZeroBounce email validation: ~100 req/sec vendor limit 120 # Set ZEROBOUNCE_ENABLED=false to skip validation (e.g. during testing) 121 # API key goes in .env.secrets — see ZEROBOUNCE_API_KEY entry there 122 ZEROBOUNCE_ENABLED=true 123 ZEROBOUNCE_REQUESTS_PER_SECOND=40 124 ZEROBOUNCE_MAX_CONCURRENT=5 125 ZEROBOUNCE_CACHE_TTL_DAYS=90 126 127 # ─── Sender Identity ──────────────────────────────────────────────────────── 128 SENDER_NAME=John Smith 129 SENDER_EMAIL=john@yourcompany.com 130 SENDER_PHONE=+1234567890 131 SENDER_COMPANY=Your Company Name 132 EMAIL_SIGNATURE=Best regards,\nJohn Smith\nYour Company Name 133 134 # CAN-SPAM Physical Address (required for commercial emails to US, CA, AU, NZ, UK, EU) 135 # Format: Street address, City, State/Province, Postal Code, Country 136 CAN_SPAM_PHYSICAL_ADDRESS= 137 138 # ─── Tracking & Unsubscribe ───────────────────────────────────────────────── 139 UNSUBSCRIBE_BASE_URL=https://yourdomain.com/unsubscribe 140 141 PAYPAL_BRAND_NAME=Audit&Fix 142 BASE_URL=http://localhost:3000 143 DEFAULT_PAYER_EMAIL=customer@example.com 144 145 # ─── Autoresponder ─────────────────────────────────────────────────────────── 146 # LLM-powered auto-reply to inbound SMS/email messages (Claude Opus via OpenRouter) 147 # true = auto-send replies without human approval; false = disable autoresponder entirely 148 AUTORESPONDER_ENABLED=true 149 150 # ─── Audit&Fix Sales Page ──────────────────────────────────────────────────── 151 # Cloudflare Worker (auditandfix-api) — bridges PHP sales page with 333Method 152 AUDITANDFIX_WORKER_URL=https://auditandfix-api.auditandfix.workers.dev 153 AUDITANDFIX_WORKER_SECRET=your-shared-secret 154 # auditandfix.com public URL (used to build /o/{site_id} short order links in SMS replies) 155 AUDITANDFIX_URL=https://auditandfix.com 156 157 # PayPal live credentials (set in PHP server environment, not here) 158 # PAYPAL_MODE=live 159 # PAYPAL_CLIENT_ID=your-live-client-id 160 # PAYPAL_CLIENT_SECRET=your-live-client-secret 161 162 # PayPal sandbox credentials — used when ?sandbox=1 param is present (E2E testing) 163 PAYPAL_SANDBOX_CLIENT_ID=your-sandbox-client-id 164 PAYPAL_SANDBOX_CLIENT_SECRET=your-sandbox-client-secret 165 PAYPAL_SANDBOX_BUYER_EMAIL=sandbox-buyer@personal.example.com 166 PAYPAL_SANDBOX_BUYER_PASSWORD=sandbox-buyer-password 167 168 # PayPal webhook verification (add as Cloudflare Worker secret: wrangler secret put PAYPAL_WEBHOOK_ID) 169 PAYPAL_WEBHOOK_ID=your-webhook-id 170 171 # PayPal worker internal auth secret (add as Cloudflare Worker secret: wrangler secret put PAYPAL_WORKER_SECRET) 172 # Used to authenticate GET/DELETE /paypal-events.json calls from local pipeline poller 173 PAYPAL_WORKER_SECRET=your-paypal-worker-secret 174 175 # Resend webhook Worker URL (deployed via wrangler) 176 # Prod: https://resend-webhook-worker.auditandfix.workers.dev 177 # Register in Resend dashboard → Settings → Webhooks 178 # Resend webhook verification (add as Cloudflare Worker secret: wrangler secret put RESEND_WEBHOOK_SECRET) 179 # Format: whsec_... (from Resend dashboard → Webhooks → Signing Secret) 180 RESEND_WEBHOOK_SECRET=whsec_your-signing-secret 181 182 # Resend worker internal auth secret (add as Cloudflare Worker secret: wrangler secret put RESEND_WORKER_SECRET) 183 # Used to authenticate GET/POST/DELETE /email-events.json calls from local pipeline 184 RESEND_WORKER_SECRET=your-resend-worker-secret 185 186 # Sender email for purchase confirmation and report delivery 187 AUDITANDFIX_SENDER_EMAIL=reports@auditandfix.com 188 189 # Legal contact email (shown in Privacy Policy, Terms, Impressum, Cookie Policy) 190 LEGALS_EMAIL=legals@auditandfix.com 191 192 # Business postal address (shown in Privacy Policy, Terms, Impressum) 193 # Format: Street, Suburb NSW POSTCODE, Australia 194 BUSINESS_ADDRESS=NSW, Australia 195 196 # Legal operator name (shown in Impressum as "Verantwortliche Person / Responsible Person") 197 OPERATOR_NAME=Your Name 198 199 # ─── Database & Storage ───────────────────────────────────────────────────── 200 DATABASE_PATH=./db/sites.db 201 SCREENSHOT_BASE_PATH=./screenshots 202 203 # ─── Cron Circuit Breaker ─────────────────────────────────────────────────── 204 # Fallback if not set in settings table. Dashboard can toggle dynamically. 205 # Set to 'false' to disable all cron jobs (useful for maintenance/debugging). 206 CRON_CIRCUIT_BREAKER_ENABLED=true 207 208 # ─── Pipeline Stage Control ───────────────────────────────────────────────── 209 # Skip stages (comma-separated): serps, assets, enrich, proposals, outreach, replies 210 # Note: scoring and rescoring are orchestrator-only — not valid pipeline stage names. 211 # API quota stages (serps, etc) self-manage via circuit breakers — don't list them here. 212 # Restart service after changing: systemctl --user restart 333method-pipeline 213 SKIP_STAGES= 214 215 # Skip outreach methods (comma-separated): sms, email, form, x, linkedin 216 OUTREACH_SKIP_METHODS= 217 218 # Block outreach to specific countries (comma-separated ISO codes) 219 # Use this to exclude countries with unresolved compliance requirements. 220 # 221 # GDPR countries (need Legitimate Interests Assessment on file): 222 # DE,FR,IT,ES,NL,BE,AT,SE,DK,NO,IE,PL,PT,FI,CZ,HU,RO,BG,HR,SK,SI,LT,LV,EE,LU,MT,CY,GR 223 # UK (UK GDPR / ICO — needs separate LIA under UK GDPR): GB 224 # 225 # Remove countries from this list once their compliance requirements are met. 226 # See docs/09-business/auditandfix-business-plan.md Risk Management section for rationale. 227 OUTREACH_BLOCKED_COUNTRIES=DE,FR,IT,ES,NL,BE,AT,SE,DK,NO,IE,PL,GB 228 229 # Block SMS outreach to specific countries (comma-separated ISO codes) 230 # Separate from OUTREACH_BLOCKED_COUNTRIES which blocks ALL channels. 231 # TCPA: US/CA cold SMS requires prior express written consent for telemarketing. 232 # Facebook v. Duguid (2021) defense may apply but untested for our use case. 233 # Email/form outreach to US/CA is NOT blocked. 234 OUTREACH_BLOCKED_SMS_COUNTRIES=US,CA 235 236 # Sites processed per cycle, per stage. Default: 5. Increase to clear backlogs (50–100). 237 PIPELINE_BATCH_MAX_SIZE=200 238 # PIPELINE_BATCH_MIN_THRESHOLD=5 # skip stage if backlog below this (avoids spin-up overhead) 239 240 # Delay between pipeline stage cycles in milliseconds (default: 1000) 241 PIPELINE_CYCLE_DELAY_MS=1000 242 243 # Interval for checking if pipeline is manually paused in milliseconds (default: 5000) 244 PIPELINE_PAUSE_CHECK_MS=5000 245 246 # ─── Stage Concurrency ────────────────────────────────────────────────────── 247 # Max parallel operations per stage. Can be adjusted live — takes effect after each site. 248 # Browser stages use adaptive scaling (reduces automatically under high load). 249 BROWSER_CONCURRENCY=5 # Max concurrent browser instances for Assets stage 250 ENRICHMENT_CONCURRENCY=5 # Max concurrent browser instances for Enrich stage 251 SCORING_CONCURRENCY=5 # Max concurrent API calls for Scoring stage 252 253 # Screen-aware throttling: tighter thresholds when physical monitor is on (user present) 254 # Screen OFF (AFK): ease=0.4, max=0.8 → Screen ON (user present): defaults below 255 SCREEN_ON_EASE_LOAD=0.2 # Normalised load below which full concurrency is used 256 SCREEN_ON_MAX_LOAD=0.5 # Normalised load above which minimum concurrency is used 257 258 # CPU gate for browser loop: pause all browser work if instantaneous CPU exceeds this 259 # Uses real-time 200ms CPU samples — much faster than loadavg (30-60s lag) 260 BROWSER_CPU_GATE=0.80 261 262 # ─── Browser Configuration ────────────────────────────────────────────────── 263 # Optional: Override Chromium path (auto-detected if not set) 264 # Use ./scripts/chromium-nice to run chromium at low priority (nice -n 19) 265 # CHROMIUM_PATH=./scripts/chromium-nice 266 267 # NixOS: Set custom browser path if Playwright can't write to Nix store 268 # PLAYWRIGHT_BROWSERS_PATH=/home/username/.cache/ms-playwright 269 270 # Separate user data directory prevents lock file conflicts with daily browser 271 # PLAYWRIGHT_USER_DATA_DIR=./.playwright-userdata 272 273 # ─── CAPTCHA Solving (NopeCHA) ─────────────────────────────────────────────── 274 # Optional: auto-solve reCAPTCHA v2, hCaptcha, and Cloudflare Turnstile on contact forms 275 # CAPTCHA solving providers — fastest provider used first (benchmarked every 30min via cron) 276 # If not set, CAPTCHAs require manual solving by the operator 277 # NopeCHA: https://nopecha.com — 100 solves/day free, $5/mth for 2000/day 278 # NOPECHA_API_KEY=your-nopecha-api-key 279 # NOPECHA_API_KEY_2=your-second-nopecha-api-key # round-robins with primary key 280 # CapMonster: https://capmonster.cloud — backup provider, cheaper than anti-captcha 281 # CAPMONSTER_API_KEY=your-capmonster-api-key 282 283 # ─── Browser Profiles (X & LinkedIn persistent sessions) ───────────────────── 284 # Profiles store cookies/storage so you don't need to re-login every run 285 BROWSER_PROFILES_DIR=./.browser-profiles 286 X_PROFILE_COUNT=3 287 LINKEDIN_PROFILE_COUNT=3 288 289 # ─── Timezone & Locale ────────────────────────────────────────────────────── 290 # Timezone should match your IP location to avoid fingerprint inconsistencies 291 TIMEZONE=Australia/Sydney 292 # Examples: en-AU,en;q=0.9 (Australian), en-US,en;q=0.9 (American) 293 ACCEPT_LANGUAGE=en-AU,en;q=0.9 294 295 # ─── X (Twitter) Automation XPath Selectors ────────────────────────────────── 296 # These may change over time as X updates their UI 297 X_LOGIN_BUTTON_XPATH=//div[contains(@style, "position: absolute; bottom: 0px; width: 100%")]//span[text()="Log in"] 298 X_PROFILE_LINK_XPATH=//a[@aria-label="Profile"] 299 X_USERNAME_DIV_XPATH=//div[@data-testid="UserName"] 300 X_MESSAGE_BUTTON_XPATH=//button[@aria-label="Message"] 301 302 # ─── Free Website Scanner (Inbound Funnel) ─────────────────────────────────── 303 # Scoring runs in the Cloudflare Worker (auditandfix-api) — no local server needed. 304 # The NixOS poll daemon (npm run score-api) pulls scans from Worker KV into SQLite. 305 # 306 # AUDITANDFIX_WORKER_URL and AUDITANDFIX_WORKER_SECRET are already defined above 307 # (shared with purchase queue — same Worker, same secret). 308 # 309 # Also set in Hostinger .htaccess (same AUDITANDFIX_WORKER_URL value): 310 # SetEnv AUDITANDFIX_WORKER_URL https://auditandfix-api.auditandfix.workers.dev 311 312 # ─── Dashboard (optional) ─────────────────────────────────────────────────── 313 DASHBOARD_PORT=8501 314 DASHBOARD_REFRESH_INTERVAL=60 # Auto-refresh interval in seconds (0 = manual) 315 DASHBOARD_CACHE_TTL=300 # Query cache TTL in seconds 316 DASHBOARD_PAGE_SIZE=50 # Pagination size for tables 317 318 # ─── Debug ─────────────────────────────────────────────────────────────────── 319 # Uncomment to enable verbose debug logging in pipeline and agent loggers 320 # DEBUG=true 321 322 # ─── End-to-End Pipeline Test ──────────────────────────────────────────────── 323 # See .env.secrets for TEST_E2E_URL, TEST_E2E_PASSWORD 324 # No Resend/Twilio magic test addresses need configuration — they're hardcoded: 325 # Resend: delivered@resend.dev, bounced@resend.dev, complained@resend.dev 326 # Twilio: +15005550006 (valid), +15005550001 (invalid), +15005550007 (no SMS) 327 TEST_E2E_KEYWORD=site:yoursite.com/terms 328 329 # ─── Loopback SMS E2E Test (npm run test:e2e:sms) ──────────────────────────── 330 # Sends a real SMS from a Twilio number to itself to verify full send→receive loop. 331 # Throttled to once per 24 hours (gate file: /tmp/sms-loopback-gate.json). 332 # Override throttle: SMS_LOOPBACK_FORCE=1 npm run test:e2e:sms 333 # 334 # TWILIO_LOOPBACK_FROM — Twilio number that sends the test SMS (e.g. AU: +61468089949) 335 # TWILIO_LOOPBACK_TO — different Twilio number that receives it (e.g. CA: +18254794242) 336 # Note: FROM and TO must be different — Twilio rejects sending from a number to itself 337 TWILIO_LOOPBACK_FROM=+61468089949 338 TWILIO_LOOPBACK_TO=+18254794242 339 # Inbound email integration tests — point to test subdomain inbox to isolate from prod 340 # TEST_EMAIL_EVENTS_WORKER_URL is auto-applied during inbound E2E tests (reverts when done) 341 TEST_INBOUND_EMAIL=marcus@test.auditandfix.com 342 TEST_EMAIL_EVENTS_WORKER_URL=https://resend-webhook-worker-test.auditandfix.workers.dev