Cradicle Explorer

/ .env.example
.env.example
  1  # ─────────────────────────────────────────────────────────────────────────────
  2  # .env — Core pipeline and operational configuration
  3  # ─────────────────────────────────────────────────────────────────────────────
  4  # This is the main config file. API keys live in .env.secrets, agent system
  5  # config lives in .env.agents. All three are loaded by src/utils/load-env.js.
  6  #
  7  # Values set in .env take priority over .env.secrets and .env.agents.
  8  
  9  # ─── ZenRows Configuration ──────────────────────────────────────────────────
 10  # Vendor limits: Free=5 concurrent/1,000 req/day; Developer=10/unlimited; Business=100/unlimited
 11  # Default: 19 (5% below Business plan baseline of 20) — increase for higher-tier plans
 12  ZENROWS_CONCURRENCY=19
 13  # Daily request quota (Free/Developer: 1,000/day). Set to 0 to disable cap.
 14  # ZENROWS_DAILY_LIMIT removed — monthly subscription has no daily limit (confirmed 2026-03-06)
 15  # Enable premium proxy for geo-targeting (required for multi-country support)
 16  ZENROWS_PREMIUM=false
 17  # ZenRows subscription cost for status cost display (flat monthly, not per-request)
 18  ZENROWS_MONTHLY_COST=69.99
 19  # ZenRows billing cycle day-of-month (billing date each month)
 20  ZENROWS_BILLING_DAY=3
 21  # Countries needing extended ZenRows timeouts (comma-separated ISO codes)
 22  # Known slow: JP (Japan), ID (Indonesia) — add others as observed via logs
 23  ZENROWS_SLOW_COUNTRIES=JP,ID
 24  ZENROWS_SLOW_TIMEOUT=300000
 25  
 26  # ─── Claude Model Tiers ──────────────────────────────────────────────────────
 27  # Update these to upgrade all Claude usage at once (OpenRouter format)
 28  CLAUDE_SONNET_MODEL=anthropic/claude-sonnet-4-6
 29  CLAUDE_HAIKU_MODEL=anthropic/claude-haiku-4-5
 30  CLAUDE_OPUS_MODEL=anthropic/claude-opus-4
 31  
 32  # ─── LLM Model Configuration ────────────────────────────────────────────────
 33  # OpenRouter format: provider/model-name (works with both OpenRouter and Anthropic)
 34  SCORING_MODEL=openai/gpt-4o-mini           # Initial AI vision scoring of screenshots
 35  PROPOSAL_MODEL=anthropic/claude-haiku-4-5  # Generate personalised proposals
 36  POLISH_MODEL=google/gemini-2.0-flash-001  # Polish/grammar-check proposals (cheap, fast)
 37  ENRICHMENT_MODEL=openai/gpt-4o-mini        # Extract contact info from web pages
 38  VISION_MODEL=openai/gpt-4o-mini            # General-purpose vision model
 39  CLASSIFICATION_MODEL=anthropic/claude-haiku-4-5  # Reply classification (fast, cheap)
 40  AUDIT_REPORT_MODEL=anthropic/claude-opus-4  # Premium audit reports (Opus + extended thinking)
 41  
 42  # Sage AI Auto-Fix (optional — only needed for automated quality fixes)
 43  #SAGE_AUTOFIX_MODEL=claude-sonnet-4-6
 44  #SAGE_AUTOFIX_BRANCH=sage-autofix
 45  
 46  # ─── OpenRouter Credit Monitoring ────────────────────────────────────────────
 47  # Alert threshold in USD (warning when low, critical when exhausted)
 48  OPENROUTER_CREDIT_THRESHOLD=10.0
 49  
 50  # ─── LLM Cost Guardrails ────────────────────────────────────────────────────
 51  # Daily spending cap in USD (hard limit, blocks calls when exceeded)
 52  LLM_DAILY_BUDGET=50
 53  # Hourly spending alert threshold in USD (warning only, does not block)
 54  LLM_HOURLY_ALERT=10
 55  
 56  # ─── Scoring & Proposals ────────────────────────────────────────────────────
 57  # Sites scoring at or below this are eligible for proposals
 58  # B (82) and below: B- (70-81), C (50-69), D (30-49), E (0-29)
 59  LOW_SCORE_CUTOFF=82
 60  
 61  # Only mention competitor score if it's at least this many points higher
 62  COMPETITOR_SCORE_THRESHOLD=10
 63  
 64  # 'true' = LLM proposals ($0.18/site via OpenRouter), 'false' = template-based (zero cost)
 65  USE_LLM_PROPOSALS=true
 66  
 67  # Maximum related keywords to fetch per seed keyword (DataForSEO)
 68  KEYWORD_EXPANSION_LIMIT=50
 69  
 70  # ─── Vision & Screenshot Configuration ──────────────────────────────────────
 71  # true:  Full pipeline with screenshots + vision analysis → assets → scoring → rescoring → enrich
 72  #        ⚠️  INCURS LLM API COSTS: ~$0.030/site via OpenRouter (outside Claude Max subscription)
 73  # false: HTML-only mode, no screenshots, skip rescoring (83% cost savings — recommended)
 74  ENABLE_VISION=true
 75  
 76  # LLM Scoring: false = use programmatic rule-based scoring (no API cost)
 77  # true = use LLM scoring via OpenRouter (GPT-4o-mini).
 78  # Claude Max orchestrator handles the zero-cost path separately.
 79  # Default: true (LLM scoring enabled)
 80  ENABLE_LLM_SCORING=true
 81  
 82  # LLM Enrichment: false = regex-only contact extraction from HTML (no API cost)
 83  # true = use LLM for location/business name extraction via OpenRouter.
 84  # Claude Max orchestrator handles the zero-cost path separately.
 85  # Default: true (LLM enrichment enabled)
 86  ENABLE_ENRICHMENT_LLM=true
 87  
 88  # DEPRECATED: Use ENABLE_VISION instead (ignored if ENABLE_VISION is set)
 89  # USE_COMPUTER_VISION_SCORING=true
 90  # USE_COMPUTER_VISION_RESCORING=true
 91  # USE_COMPUTER_VISION_ENRICHMENT=true
 92  # ENABLE_SCREENSHOT_CAPTURE=true
 93  
 94  # ─── API Rate Limits ────────────────────────────────────────────────────────
 95  # Throughput set safely below vendor and regulatory limits.
 96  # Twilio + Resend are 30% below vendor limits; others are 2–5% below.
 97  # See src/utils/rate-limiter.js for where these are applied.
 98  
 99  # OpenRouter: 200 RPM paid plan; 20 RPM free tier
100  OPENROUTER_REQUESTS_PER_MINUTE=194
101  OPENROUTER_MAX_CONCURRENT=5
102  
103  # Twilio SMS: Long code = 1 SMS/sec; Toll-free = 3 SMS/sec; Short code = 100 SMS/sec
104  # TCPA/A2P compliance enforced separately in compliance.js
105  TWILIO_REQUESTS_PER_SECOND=0.7
106  TWILIO_MAX_CONCURRENT=1
107  # Pre-tracking SMS spend not recorded in DB — added to lifetime cost estimate in npm run status
108  # Set to actual spend before tracking began (default $7.30 = Feb spend before tracking started)
109  TWILIO_COST_OFFSET_USD=7.30
110  
111  # Resend: 10 req/sec all paid plans
112  RESEND_REQUESTS_PER_SECOND=1.5  # actual plan limit: 2 req/sec (docs say 10 but plan enforces 2)
113  RESEND_MAX_CONCURRENT=1
114  
115  # DataForSEO: 2,000 req/min; 100 concurrent
116  DATAFORSEO_REQUESTS_PER_MINUTE=1940
117  DATAFORSEO_MAX_CONCURRENT=97
118  
119  # ZeroBounce email validation: ~100 req/sec vendor limit
120  # Set ZEROBOUNCE_ENABLED=false to skip validation (e.g. during testing)
121  # API key goes in .env.secrets — see ZEROBOUNCE_API_KEY entry there
122  ZEROBOUNCE_ENABLED=true
123  ZEROBOUNCE_REQUESTS_PER_SECOND=40
124  ZEROBOUNCE_MAX_CONCURRENT=5
125  ZEROBOUNCE_CACHE_TTL_DAYS=90
126  
127  # ─── Brand / White-Label ────────────────────────────────────────────────────
128  # Used across emails, PDFs, LLM prompts, URL allowlists, and HTTP-Referer headers
129  BRAND_NAME=Audit&Fix
130  BRAND_DOMAIN=auditandfix.com
131  BRAND_URL=https://auditandfix.com
132  
133  # ─── Persona / Operator Identity ────────────────────────────────────────────
134  # The person (or persona) behind outreach — used in proposals, emails, LLM prompts
135  PERSONA_NAME=Marcus Webb
136  PERSONA_FIRST_NAME=Marcus
137  
138  # ─── Sender Identity ────────────────────────────────────────────────────────
139  SENDER_NAME=John Smith
140  SENDER_EMAIL=john@yourcompany.com
141  SENDER_PHONE=+1234567890
142  SENDER_COMPANY=Your Company Name
143  EMAIL_SIGNATURE=Best regards,\nJohn Smith\nYour Company Name
144  
145  # CAN-SPAM Physical Address (required for commercial emails to US, CA, AU, NZ, UK, EU)
146  # Format: Street address, City, State/Province, Postal Code, Country
147  CAN_SPAM_PHYSICAL_ADDRESS=
148  
149  # ─── Tracking & Unsubscribe ─────────────────────────────────────────────────
150  UNSUBSCRIBE_BASE_URL=https://yourdomain.com/unsubscribe
151  
152  # PayPal checkout branding — should match BRAND_NAME
153  PAYPAL_BRAND_NAME=Audit&Fix
154  BASE_URL=http://localhost:3000
155  DEFAULT_PAYER_EMAIL=customer@example.com
156  
157  # ─── Autoresponder ───────────────────────────────────────────────────────────
158  # LLM-powered auto-reply to inbound SMS/email messages (Claude Opus via OpenRouter)
159  # true = auto-send replies without human approval; false = disable autoresponder entirely
160  AUTORESPONDER_ENABLED=true
161  
162  # ─── Sales Page ──────────────────────────────────────────────────────────────
163  # Cloudflare Worker — bridges PHP sales page with 333Method
164  API_WORKER_URL=https://auditandfix-api.auditandfix.workers.dev
165  API_WORKER_SECRET=your-shared-secret
166  # BRAND_URL (defined above) used to build /o/{site_id} short order links in SMS replies
167  
168  # PayPal live credentials (set in PHP server environment, not here)
169  # PAYPAL_MODE=live
170  # PAYPAL_CLIENT_ID=your-live-client-id
171  # PAYPAL_CLIENT_SECRET=your-live-client-secret
172  
173  # PayPal sandbox credentials — used when ?sandbox=1 param is present (E2E testing)
174  PAYPAL_SANDBOX_CLIENT_ID=your-sandbox-client-id
175  PAYPAL_SANDBOX_CLIENT_SECRET=your-sandbox-client-secret
176  PAYPAL_SANDBOX_BUYER_EMAIL=sandbox-buyer@personal.example.com
177  PAYPAL_SANDBOX_BUYER_PASSWORD=sandbox-buyer-password
178  
179  # PayPal webhook verification (add as Cloudflare Worker secret: wrangler secret put PAYPAL_WEBHOOK_ID)
180  PAYPAL_WEBHOOK_ID=your-webhook-id
181  
182  # PayPal worker internal auth secret (add as Cloudflare Worker secret: wrangler secret put PAYPAL_WORKER_SECRET)
183  # Used to authenticate GET/DELETE /paypal-events.json calls from local pipeline poller
184  PAYPAL_WORKER_SECRET=your-paypal-worker-secret
185  
186  # Resend webhook Worker URL (deployed via wrangler)
187  # Prod:  https://resend-webhook-worker.auditandfix.workers.dev
188  # Register in Resend dashboard → Settings → Webhooks
189  # Resend webhook verification (add as Cloudflare Worker secret: wrangler secret put RESEND_WEBHOOK_SECRET)
190  # Format: whsec_... (from Resend dashboard → Webhooks → Signing Secret)
191  RESEND_WEBHOOK_SECRET=whsec_your-signing-secret
192  
193  # Resend worker internal auth secret (add as Cloudflare Worker secret: wrangler secret put RESEND_WORKER_SECRET)
194  # Used to authenticate GET/POST/DELETE /email-events.json calls from local pipeline
195  RESEND_WORKER_SECRET=your-resend-worker-secret
196  
197  # Sender email for purchase confirmation and report delivery
198  SENDER_EMAIL=reports@auditandfix.com
199  
200  # Legal contact email (shown in Privacy Policy, Terms, Impressum, Cookie Policy)
201  LEGALS_EMAIL=legals@auditandfix.com
202  
203  # Business postal address (shown in Privacy Policy, Terms, Impressum)
204  # Format: Street, Suburb NSW POSTCODE, Australia
205  BUSINESS_ADDRESS=NSW, Australia
206  
207  # Legal operator name (shown in Impressum as "Verantwortliche Person / Responsible Person")
208  OPERATOR_NAME=Your Name
209  
210  # ─── Database & Storage ─────────────────────────────────────────────────────
211  DATABASE_PATH=./db/sites.db
212  SCREENSHOT_BASE_PATH=./screenshots
213  
214  # ─── Cron Circuit Breaker ───────────────────────────────────────────────────
215  # Fallback if not set in settings table. Dashboard can toggle dynamically.
216  # Set to 'false' to disable all cron jobs (useful for maintenance/debugging).
217  CRON_CIRCUIT_BREAKER_ENABLED=true
218  
219  # ─── Pipeline Stage Control ─────────────────────────────────────────────────
220  # Skip stages (comma-separated): serps, assets, enrich, proposals, outreach, replies
221  # Note: scoring and rescoring are orchestrator-only — not valid pipeline stage names.
222  # API quota stages (serps, etc) self-manage via circuit breakers — don't list them here.
223  # Restart service after changing: systemctl --user restart 333method-pipeline
224  SKIP_STAGES=
225  
226  # Skip outreach methods (comma-separated): sms, email, form, x, linkedin
227  OUTREACH_SKIP_METHODS=
228  
229  # Block outreach to specific countries (comma-separated ISO codes)
230  # Use this to exclude countries with unresolved compliance requirements.
231  #
232  # GDPR countries (need Legitimate Interests Assessment on file):
233  #   DE,FR,IT,ES,NL,BE,AT,SE,DK,NO,IE,PL,PT,FI,CZ,HU,RO,BG,HR,SK,SI,LT,LV,EE,LU,MT,CY,GR
234  # UK (UK GDPR / ICO — needs separate LIA under UK GDPR): GB
235  #
236  # Remove countries from this list once their compliance requirements are met.
237  # See docs/09-business/auditandfix-business-plan.md Risk Management section for rationale.
238  OUTREACH_BLOCKED_COUNTRIES=DE,FR,IT,ES,NL,BE,AT,SE,DK,NO,IE,PL,GB
239  
240  # Block SMS outreach to specific countries (comma-separated ISO codes)
241  # Separate from OUTREACH_BLOCKED_COUNTRIES which blocks ALL channels.
242  # TCPA: US/CA cold SMS requires prior express written consent for telemarketing.
243  # Facebook v. Duguid (2021) defense may apply but untested for our use case.
244  # Email/form outreach to US/CA is NOT blocked.
245  OUTREACH_BLOCKED_SMS_COUNTRIES=US,CA
246  
247  # Sites processed per cycle, per stage. Default: 5. Increase to clear backlogs (50–100).
248  PIPELINE_BATCH_MAX_SIZE=200
249  # PIPELINE_BATCH_MIN_THRESHOLD=5  # skip stage if backlog below this (avoids spin-up overhead)
250  
251  # Delay between pipeline stage cycles in milliseconds (default: 1000)
252  PIPELINE_CYCLE_DELAY_MS=1000
253  
254  # Interval for checking if pipeline is manually paused in milliseconds (default: 5000)
255  PIPELINE_PAUSE_CHECK_MS=5000
256  
257  # ─── Stage Concurrency ──────────────────────────────────────────────────────
258  # Max parallel operations per stage. Can be adjusted live — takes effect after each site.
259  # Browser stages use adaptive scaling (reduces automatically under high load).
260  BROWSER_CONCURRENCY=5          # Max concurrent browser instances for Assets stage
261  ENRICHMENT_CONCURRENCY=5       # Max concurrent browser instances for Enrich stage
262  SCORING_CONCURRENCY=5          # Max concurrent API calls for Scoring stage
263  
264  # Screen-aware throttling: tighter thresholds when physical monitor is on (user present)
265  # Screen OFF (AFK): ease=0.4, max=0.8  →  Screen ON (user present): defaults below
266  SCREEN_ON_EASE_LOAD=0.2        # Normalised load below which full concurrency is used
267  SCREEN_ON_MAX_LOAD=0.5         # Normalised load above which minimum concurrency is used
268  
269  # CPU gate for browser loop: pause all browser work if instantaneous CPU exceeds this
270  # Uses real-time 200ms CPU samples — much faster than loadavg (30-60s lag)
271  BROWSER_CPU_GATE=0.80
272  
273  # ─── Browser Configuration ──────────────────────────────────────────────────
274  # Optional: Override Chromium path (auto-detected if not set)
275  # Use ./scripts/chromium-nice to run chromium at low priority (nice -n 19)
276  # CHROMIUM_PATH=./scripts/chromium-nice
277  
278  # NixOS: Set custom browser path if Playwright can't write to Nix store
279  # PLAYWRIGHT_BROWSERS_PATH=/home/username/.cache/ms-playwright
280  
281  # Separate user data directory prevents lock file conflicts with daily browser
282  # PLAYWRIGHT_USER_DATA_DIR=./.playwright-userdata
283  
284  # ─── CAPTCHA Solving (NopeCHA) ───────────────────────────────────────────────
285  # Optional: auto-solve reCAPTCHA v2, hCaptcha, and Cloudflare Turnstile on contact forms
286  # CAPTCHA solving providers — fastest provider used first (benchmarked every 30min via cron)
287  # If not set, CAPTCHAs require manual solving by the operator
288  # NopeCHA: https://nopecha.com — 100 solves/day free, $5/mth for 2000/day
289  # NOPECHA_API_KEY=your-nopecha-api-key
290  # NOPECHA_API_KEY_2=your-second-nopecha-api-key  # round-robins with primary key
291  # CapMonster: https://capmonster.cloud — backup provider, cheaper than anti-captcha
292  # CAPMONSTER_API_KEY=your-capmonster-api-key
293  
294  # ─── Browser Profiles (X & LinkedIn persistent sessions) ─────────────────────
295  # Profiles store cookies/storage so you don't need to re-login every run
296  BROWSER_PROFILES_DIR=./.browser-profiles
297  X_PROFILE_COUNT=3
298  LINKEDIN_PROFILE_COUNT=3
299  
300  # ─── Timezone & Locale ──────────────────────────────────────────────────────
301  # Timezone should match your IP location to avoid fingerprint inconsistencies
302  TIMEZONE=Australia/Sydney
303  # Examples: en-AU,en;q=0.9 (Australian), en-US,en;q=0.9 (American)
304  ACCEPT_LANGUAGE=en-AU,en;q=0.9
305  
306  # ─── X (Twitter) Automation XPath Selectors ──────────────────────────────────
307  # These may change over time as X updates their UI
308  X_LOGIN_BUTTON_XPATH=//div[contains(@style, "position: absolute; bottom: 0px; width: 100%")]//span[text()="Log in"]
309  X_PROFILE_LINK_XPATH=//a[@aria-label="Profile"]
310  X_USERNAME_DIV_XPATH=//div[@data-testid="UserName"]
311  X_MESSAGE_BUTTON_XPATH=//button[@aria-label="Message"]
312  
313  # ─── Free Website Scanner (Inbound Funnel) ───────────────────────────────────
314  # Scoring runs in the Cloudflare Worker (auditandfix-api) — no local server needed.
315  # The NixOS poll daemon (npm run score-api) pulls scans from Worker KV into SQLite.
316  #
317  # API_WORKER_URL and API_WORKER_SECRET are already defined above
318  # (shared with purchase queue — same Worker, same secret).
319  #
320  # Also set in Hostinger .htaccess (same API_WORKER_URL value):
321  #   SetEnv API_WORKER_URL https://auditandfix-api.auditandfix.workers.dev
322  
323  # ─── Dashboard (optional) ───────────────────────────────────────────────────
324  DASHBOARD_PORT=8501
325  DASHBOARD_REFRESH_INTERVAL=60  # Auto-refresh interval in seconds (0 = manual)
326  DASHBOARD_CACHE_TTL=300         # Query cache TTL in seconds
327  DASHBOARD_PAGE_SIZE=50          # Pagination size for tables
328  
329  # ─── Debug ───────────────────────────────────────────────────────────────────
330  # Uncomment to enable verbose debug logging in pipeline and agent loggers
331  # DEBUG=true
332  
333  # ─── End-to-End Pipeline Test ────────────────────────────────────────────────
334  # See .env.secrets for TEST_E2E_URL, TEST_E2E_PASSWORD
335  # No Resend/Twilio magic test addresses need configuration — they're hardcoded:
336  #   Resend: delivered@resend.dev, bounced@resend.dev, complained@resend.dev
337  #   Twilio: +15005550006 (valid), +15005550001 (invalid), +15005550007 (no SMS)
338  TEST_E2E_KEYWORD=site:yoursite.com/terms
339  
340  # ─── Loopback SMS E2E Test (npm run test:e2e:sms) ────────────────────────────
341  # Sends a real SMS from a Twilio number to itself to verify full send→receive loop.
342  # Throttled to once per 24 hours (gate file: /tmp/sms-loopback-gate.json).
343  # Override throttle: SMS_LOOPBACK_FORCE=1 npm run test:e2e:sms
344  #
345  # TWILIO_LOOPBACK_FROM — Twilio number that sends the test SMS (e.g. AU: +61468089949)
346  # TWILIO_LOOPBACK_TO   — different Twilio number that receives it (e.g. CA: +18254794242)
347  # Note: FROM and TO must be different — Twilio rejects sending from a number to itself
348  TWILIO_LOOPBACK_FROM=+61468089949
349  TWILIO_LOOPBACK_TO=+18254794242
350  # Inbound email integration tests — point to test subdomain inbox to isolate from prod
351  # TEST_EMAIL_EVENTS_WORKER_URL is auto-applied during inbound E2E tests (reverts when done)
352  TEST_INBOUND_EMAIL=marcus@test.auditandfix.com
353  TEST_EMAIL_EVENTS_WORKER_URL=https://resend-webhook-worker-test.auditandfix.workers.dev