Cradicle Explorer

/ .env.example
.env.example
  1  # ─────────────────────────────────────────────────────────────────────────────
  2  # .env — Core pipeline and operational configuration
  3  # ─────────────────────────────────────────────────────────────────────────────
  4  # This is the main config file. API keys live in .env.secrets, agent system
  5  # config lives in .env.agents. All three are loaded by src/utils/load-env.js.
  6  #
  7  # Values set in .env take priority over .env.secrets and .env.agents.
  8  
  9  # ─── ZenRows Configuration ──────────────────────────────────────────────────
 10  # Vendor limits: Free=5 concurrent/1,000 req/day; Developer=10/unlimited; Business=100/unlimited
 11  # Default: 19 (5% below Business plan baseline of 20) — increase for higher-tier plans
 12  ZENROWS_CONCURRENCY=19
 13  # Daily request quota (Free/Developer: 1,000/day). Set to 0 to disable cap.
 14  # ZENROWS_DAILY_LIMIT removed — monthly subscription has no daily limit (confirmed 2026-03-06)
 15  # Enable premium proxy for geo-targeting (required for multi-country support)
 16  ZENROWS_PREMIUM=false
 17  # ZenRows subscription cost for status cost display (flat monthly, not per-request)
 18  ZENROWS_MONTHLY_COST=69.99
 19  # ZenRows billing cycle day-of-month (billing date each month)
 20  ZENROWS_BILLING_DAY=3
 21  # Countries needing extended ZenRows timeouts (comma-separated ISO codes)
 22  # Known slow: JP (Japan), ID (Indonesia) — add others as observed via logs
 23  ZENROWS_SLOW_COUNTRIES=JP,ID
 24  ZENROWS_SLOW_TIMEOUT=300000
 25  
 26  # ─── Claude Model Tiers ──────────────────────────────────────────────────────
 27  # Update these to upgrade all Claude usage at once (OpenRouter format)
 28  CLAUDE_SONNET_MODEL=anthropic/claude-sonnet-4-6
 29  CLAUDE_HAIKU_MODEL=anthropic/claude-haiku-4-5
 30  CLAUDE_OPUS_MODEL=anthropic/claude-opus-4
 31  
 32  # ─── LLM Model Configuration ────────────────────────────────────────────────
 33  # OpenRouter format: provider/model-name (works with both OpenRouter and Anthropic)
 34  SCORING_MODEL=openai/gpt-4o-mini           # Initial AI vision scoring of screenshots
 35  PROPOSAL_MODEL=anthropic/claude-haiku-4-5  # Generate personalised proposals
 36  POLISH_MODEL=google/gemini-2.0-flash-001  # Polish/grammar-check proposals (cheap, fast)
 37  ENRICHMENT_MODEL=openai/gpt-4o-mini        # Extract contact info from web pages
 38  VISION_MODEL=openai/gpt-4o-mini            # General-purpose vision model
 39  CLASSIFICATION_MODEL=anthropic/claude-haiku-4-5  # Reply classification (fast, cheap)
 40  AUDIT_REPORT_MODEL=anthropic/claude-opus-4  # Premium audit reports (Opus + extended thinking)
 41  
 42  # Sage AI Auto-Fix (optional — only needed for automated quality fixes)
 43  #SAGE_AUTOFIX_MODEL=claude-sonnet-4-6
 44  #SAGE_AUTOFIX_BRANCH=sage-autofix
 45  
 46  # ─── OpenRouter Credit Monitoring ────────────────────────────────────────────
 47  # Alert threshold in USD (warning when low, critical when exhausted)
 48  OPENROUTER_CREDIT_THRESHOLD=10.0
 49  
 50  # ─── LLM Cost Guardrails ────────────────────────────────────────────────────
 51  # Daily spending cap in USD (hard limit, blocks calls when exceeded)
 52  LLM_DAILY_BUDGET=50
 53  # Hourly spending alert threshold in USD (warning only, does not block)
 54  LLM_HOURLY_ALERT=10
 55  
 56  # ─── Scoring & Proposals ────────────────────────────────────────────────────
 57  # Sites scoring at or below this are eligible for proposals
 58  # B (82) and below: B- (70-81), C (50-69), D (30-49), E (0-29)
 59  LOW_SCORE_CUTOFF=82
 60  
 61  # Only mention competitor score if it's at least this many points higher
 62  COMPETITOR_SCORE_THRESHOLD=10
 63  
 64  # 'true' = LLM proposals ($0.18/site via OpenRouter), 'false' = template-based (zero cost)
 65  USE_LLM_PROPOSALS=true
 66  
 67  # Maximum related keywords to fetch per seed keyword (DataForSEO)
 68  KEYWORD_EXPANSION_LIMIT=50
 69  
 70  # ─── Vision & Screenshot Configuration ──────────────────────────────────────
 71  # true:  Full pipeline with screenshots + vision analysis → assets → scoring → rescoring → enrich
 72  #        ⚠️  INCURS LLM API COSTS: ~$0.030/site via OpenRouter (outside Claude Max subscription)
 73  # false: HTML-only mode, no screenshots, skip rescoring (83% cost savings — recommended)
 74  ENABLE_VISION=true
 75  
 76  # LLM Scoring: false = use programmatic rule-based scoring (no API cost)
 77  # true = use LLM scoring via OpenRouter (GPT-4o-mini).
 78  # Claude Max orchestrator handles the zero-cost path separately.
 79  # Default: true (LLM scoring enabled)
 80  ENABLE_LLM_SCORING=true
 81  
 82  # LLM Enrichment: false = regex-only contact extraction from HTML (no API cost)
 83  # true = use LLM for location/business name extraction via OpenRouter.
 84  # Claude Max orchestrator handles the zero-cost path separately.
 85  # Default: true (LLM enrichment enabled)
 86  ENABLE_ENRICHMENT_LLM=true
 87  
 88  # DEPRECATED: Use ENABLE_VISION instead (ignored if ENABLE_VISION is set)
 89  # USE_COMPUTER_VISION_SCORING=true
 90  # USE_COMPUTER_VISION_RESCORING=true
 91  # USE_COMPUTER_VISION_ENRICHMENT=true
 92  # ENABLE_SCREENSHOT_CAPTURE=true
 93  
 94  # ─── API Rate Limits ────────────────────────────────────────────────────────
 95  # Throughput set safely below vendor and regulatory limits.
 96  # Twilio + Resend are 30% below vendor limits; others are 2–5% below.
 97  # See src/utils/rate-limiter.js for where these are applied.
 98  
 99  # OpenRouter: 200 RPM paid plan; 20 RPM free tier
100  OPENROUTER_REQUESTS_PER_MINUTE=194
101  OPENROUTER_MAX_CONCURRENT=5
102  
103  # Twilio SMS: Long code = 1 SMS/sec; Toll-free = 3 SMS/sec; Short code = 100 SMS/sec
104  # TCPA/A2P compliance enforced separately in compliance.js
105  TWILIO_REQUESTS_PER_SECOND=0.7
106  TWILIO_MAX_CONCURRENT=1
107  # Pre-tracking SMS spend not recorded in DB — added to lifetime cost estimate in npm run status
108  # Set to actual spend before tracking began (default $7.30 = Feb spend before tracking started)
109  TWILIO_COST_OFFSET_USD=7.30
110  
111  # Resend: 10 req/sec all paid plans
112  RESEND_REQUESTS_PER_SECOND=1.5  # actual plan limit: 2 req/sec (docs say 10 but plan enforces 2)
113  RESEND_MAX_CONCURRENT=1
114  
115  # DataForSEO: 2,000 req/min; 100 concurrent
116  DATAFORSEO_REQUESTS_PER_MINUTE=1940
117  DATAFORSEO_MAX_CONCURRENT=97
118  
119  # ZeroBounce email validation: ~100 req/sec vendor limit
120  # Set ZEROBOUNCE_ENABLED=false to skip validation (e.g. during testing)
121  # API key goes in .env.secrets — see ZEROBOUNCE_API_KEY entry there
122  ZEROBOUNCE_ENABLED=true
123  ZEROBOUNCE_REQUESTS_PER_SECOND=40
124  ZEROBOUNCE_MAX_CONCURRENT=5
125  ZEROBOUNCE_CACHE_TTL_DAYS=90
126  
127  # ─── Sender Identity ────────────────────────────────────────────────────────
128  SENDER_NAME=John Smith
129  SENDER_EMAIL=john@yourcompany.com
130  SENDER_PHONE=+1234567890
131  SENDER_COMPANY=Your Company Name
132  EMAIL_SIGNATURE=Best regards,\nJohn Smith\nYour Company Name
133  
134  # CAN-SPAM Physical Address (required for commercial emails to US, CA, AU, NZ, UK, EU)
135  # Format: Street address, City, State/Province, Postal Code, Country
136  CAN_SPAM_PHYSICAL_ADDRESS=
137  
138  # ─── Tracking & Unsubscribe ─────────────────────────────────────────────────
139  UNSUBSCRIBE_BASE_URL=https://yourdomain.com/unsubscribe
140  
141  PAYPAL_BRAND_NAME=Audit&Fix
142  BASE_URL=http://localhost:3000
143  DEFAULT_PAYER_EMAIL=customer@example.com
144  
145  # ─── Autoresponder ───────────────────────────────────────────────────────────
146  # LLM-powered auto-reply to inbound SMS/email messages (Claude Opus via OpenRouter)
147  # true = auto-send replies without human approval; false = disable autoresponder entirely
148  AUTORESPONDER_ENABLED=true
149  
150  # ─── Audit&Fix Sales Page ────────────────────────────────────────────────────
151  # Cloudflare Worker (auditandfix-api) — bridges PHP sales page with 333Method
152  AUDITANDFIX_WORKER_URL=https://auditandfix-api.auditandfix.workers.dev
153  AUDITANDFIX_WORKER_SECRET=your-shared-secret
154  # auditandfix.com public URL (used to build /o/{site_id} short order links in SMS replies)
155  AUDITANDFIX_URL=https://auditandfix.com
156  
157  # PayPal live credentials (set in PHP server environment, not here)
158  # PAYPAL_MODE=live
159  # PAYPAL_CLIENT_ID=your-live-client-id
160  # PAYPAL_CLIENT_SECRET=your-live-client-secret
161  
162  # PayPal sandbox credentials — used when ?sandbox=1 param is present (E2E testing)
163  PAYPAL_SANDBOX_CLIENT_ID=your-sandbox-client-id
164  PAYPAL_SANDBOX_CLIENT_SECRET=your-sandbox-client-secret
165  PAYPAL_SANDBOX_BUYER_EMAIL=sandbox-buyer@personal.example.com
166  PAYPAL_SANDBOX_BUYER_PASSWORD=sandbox-buyer-password
167  
168  # PayPal webhook verification (add as Cloudflare Worker secret: wrangler secret put PAYPAL_WEBHOOK_ID)
169  PAYPAL_WEBHOOK_ID=your-webhook-id
170  
171  # PayPal worker internal auth secret (add as Cloudflare Worker secret: wrangler secret put PAYPAL_WORKER_SECRET)
172  # Used to authenticate GET/DELETE /paypal-events.json calls from local pipeline poller
173  PAYPAL_WORKER_SECRET=your-paypal-worker-secret
174  
175  # Resend webhook Worker URL (deployed via wrangler)
176  # Prod:  https://resend-webhook-worker.auditandfix.workers.dev
177  # Register in Resend dashboard → Settings → Webhooks
178  # Resend webhook verification (add as Cloudflare Worker secret: wrangler secret put RESEND_WEBHOOK_SECRET)
179  # Format: whsec_... (from Resend dashboard → Webhooks → Signing Secret)
180  RESEND_WEBHOOK_SECRET=whsec_your-signing-secret
181  
182  # Resend worker internal auth secret (add as Cloudflare Worker secret: wrangler secret put RESEND_WORKER_SECRET)
183  # Used to authenticate GET/POST/DELETE /email-events.json calls from local pipeline
184  RESEND_WORKER_SECRET=your-resend-worker-secret
185  
186  # Sender email for purchase confirmation and report delivery
187  AUDITANDFIX_SENDER_EMAIL=reports@auditandfix.com
188  
189  # Legal contact email (shown in Privacy Policy, Terms, Impressum, Cookie Policy)
190  LEGALS_EMAIL=legals@auditandfix.com
191  
192  # Business postal address (shown in Privacy Policy, Terms, Impressum)
193  # Format: Street, Suburb NSW POSTCODE, Australia
194  BUSINESS_ADDRESS=NSW, Australia
195  
196  # Legal operator name (shown in Impressum as "Verantwortliche Person / Responsible Person")
197  OPERATOR_NAME=Your Name
198  
199  # ─── Database & Storage ─────────────────────────────────────────────────────
200  DATABASE_PATH=./db/sites.db
201  SCREENSHOT_BASE_PATH=./screenshots
202  
203  # ─── Cron Circuit Breaker ───────────────────────────────────────────────────
204  # Fallback if not set in settings table. Dashboard can toggle dynamically.
205  # Set to 'false' to disable all cron jobs (useful for maintenance/debugging).
206  CRON_CIRCUIT_BREAKER_ENABLED=true
207  
208  # ─── Pipeline Stage Control ─────────────────────────────────────────────────
209  # Skip stages (comma-separated): serps, assets, enrich, proposals, outreach, replies
210  # Note: scoring and rescoring are orchestrator-only — not valid pipeline stage names.
211  # API quota stages (serps, etc) self-manage via circuit breakers — don't list them here.
212  # Restart service after changing: systemctl --user restart 333method-pipeline
213  SKIP_STAGES=
214  
215  # Skip outreach methods (comma-separated): sms, email, form, x, linkedin
216  OUTREACH_SKIP_METHODS=
217  
218  # Block outreach to specific countries (comma-separated ISO codes)
219  # Use this to exclude countries with unresolved compliance requirements.
220  #
221  # GDPR countries (need Legitimate Interests Assessment on file):
222  #   DE,FR,IT,ES,NL,BE,AT,SE,DK,NO,IE,PL,PT,FI,CZ,HU,RO,BG,HR,SK,SI,LT,LV,EE,LU,MT,CY,GR
223  # UK (UK GDPR / ICO — needs separate LIA under UK GDPR): GB
224  #
225  # Remove countries from this list once their compliance requirements are met.
226  # See docs/09-business/auditandfix-business-plan.md Risk Management section for rationale.
227  OUTREACH_BLOCKED_COUNTRIES=DE,FR,IT,ES,NL,BE,AT,SE,DK,NO,IE,PL,GB
228  
229  # Block SMS outreach to specific countries (comma-separated ISO codes)
230  # Separate from OUTREACH_BLOCKED_COUNTRIES which blocks ALL channels.
231  # TCPA: US/CA cold SMS requires prior express written consent for telemarketing.
232  # Facebook v. Duguid (2021) defense may apply but untested for our use case.
233  # Email/form outreach to US/CA is NOT blocked.
234  OUTREACH_BLOCKED_SMS_COUNTRIES=US,CA
235  
236  # Sites processed per cycle, per stage. Default: 5. Increase to clear backlogs (50–100).
237  PIPELINE_BATCH_MAX_SIZE=200
238  # PIPELINE_BATCH_MIN_THRESHOLD=5  # skip stage if backlog below this (avoids spin-up overhead)
239  
240  # Delay between pipeline stage cycles in milliseconds (default: 1000)
241  PIPELINE_CYCLE_DELAY_MS=1000
242  
243  # Interval for checking if pipeline is manually paused in milliseconds (default: 5000)
244  PIPELINE_PAUSE_CHECK_MS=5000
245  
246  # ─── Stage Concurrency ──────────────────────────────────────────────────────
247  # Max parallel operations per stage. Can be adjusted live — takes effect after each site.
248  # Browser stages use adaptive scaling (reduces automatically under high load).
249  BROWSER_CONCURRENCY=5          # Max concurrent browser instances for Assets stage
250  ENRICHMENT_CONCURRENCY=5       # Max concurrent browser instances for Enrich stage
251  SCORING_CONCURRENCY=5          # Max concurrent API calls for Scoring stage
252  
253  # Screen-aware throttling: tighter thresholds when physical monitor is on (user present)
254  # Screen OFF (AFK): ease=0.4, max=0.8  →  Screen ON (user present): defaults below
255  SCREEN_ON_EASE_LOAD=0.2        # Normalised load below which full concurrency is used
256  SCREEN_ON_MAX_LOAD=0.5         # Normalised load above which minimum concurrency is used
257  
258  # CPU gate for browser loop: pause all browser work if instantaneous CPU exceeds this
259  # Uses real-time 200ms CPU samples — much faster than loadavg (30-60s lag)
260  BROWSER_CPU_GATE=0.80
261  
262  # ─── Browser Configuration ──────────────────────────────────────────────────
263  # Optional: Override Chromium path (auto-detected if not set)
264  # Use ./scripts/chromium-nice to run chromium at low priority (nice -n 19)
265  # CHROMIUM_PATH=./scripts/chromium-nice
266  
267  # NixOS: Set custom browser path if Playwright can't write to Nix store
268  # PLAYWRIGHT_BROWSERS_PATH=/home/username/.cache/ms-playwright
269  
270  # Separate user data directory prevents lock file conflicts with daily browser
271  # PLAYWRIGHT_USER_DATA_DIR=./.playwright-userdata
272  
273  # ─── CAPTCHA Solving (NopeCHA) ───────────────────────────────────────────────
274  # Optional: auto-solve reCAPTCHA v2, hCaptcha, and Cloudflare Turnstile on contact forms
275  # CAPTCHA solving providers — fastest provider used first (benchmarked every 30min via cron)
276  # If not set, CAPTCHAs require manual solving by the operator
277  # NopeCHA: https://nopecha.com — 100 solves/day free, $5/mth for 2000/day
278  # NOPECHA_API_KEY=your-nopecha-api-key
279  # NOPECHA_API_KEY_2=your-second-nopecha-api-key  # round-robins with primary key
280  # CapMonster: https://capmonster.cloud — backup provider, cheaper than anti-captcha
281  # CAPMONSTER_API_KEY=your-capmonster-api-key
282  
283  # ─── Browser Profiles (X & LinkedIn persistent sessions) ─────────────────────
284  # Profiles store cookies/storage so you don't need to re-login every run
285  BROWSER_PROFILES_DIR=./.browser-profiles
286  X_PROFILE_COUNT=3
287  LINKEDIN_PROFILE_COUNT=3
288  
289  # ─── Timezone & Locale ──────────────────────────────────────────────────────
290  # Timezone should match your IP location to avoid fingerprint inconsistencies
291  TIMEZONE=Australia/Sydney
292  # Examples: en-AU,en;q=0.9 (Australian), en-US,en;q=0.9 (American)
293  ACCEPT_LANGUAGE=en-AU,en;q=0.9
294  
295  # ─── X (Twitter) Automation XPath Selectors ──────────────────────────────────
296  # These may change over time as X updates their UI
297  X_LOGIN_BUTTON_XPATH=//div[contains(@style, "position: absolute; bottom: 0px; width: 100%")]//span[text()="Log in"]
298  X_PROFILE_LINK_XPATH=//a[@aria-label="Profile"]
299  X_USERNAME_DIV_XPATH=//div[@data-testid="UserName"]
300  X_MESSAGE_BUTTON_XPATH=//button[@aria-label="Message"]
301  
302  # ─── Free Website Scanner (Inbound Funnel) ───────────────────────────────────
303  # Scoring runs in the Cloudflare Worker (auditandfix-api) — no local server needed.
304  # The NixOS poll daemon (npm run score-api) pulls scans from Worker KV into SQLite.
305  #
306  # AUDITANDFIX_WORKER_URL and AUDITANDFIX_WORKER_SECRET are already defined above
307  # (shared with purchase queue — same Worker, same secret).
308  #
309  # Also set in Hostinger .htaccess (same AUDITANDFIX_WORKER_URL value):
310  #   SetEnv AUDITANDFIX_WORKER_URL https://auditandfix-api.auditandfix.workers.dev
311  
312  # ─── Dashboard (optional) ───────────────────────────────────────────────────
313  DASHBOARD_PORT=8501
314  DASHBOARD_REFRESH_INTERVAL=60  # Auto-refresh interval in seconds (0 = manual)
315  DASHBOARD_CACHE_TTL=300         # Query cache TTL in seconds
316  DASHBOARD_PAGE_SIZE=50          # Pagination size for tables
317  
318  # ─── Debug ───────────────────────────────────────────────────────────────────
319  # Uncomment to enable verbose debug logging in pipeline and agent loggers
320  # DEBUG=true
321  
322  # ─── End-to-End Pipeline Test ────────────────────────────────────────────────
323  # See .env.secrets for TEST_E2E_URL, TEST_E2E_PASSWORD
324  # No Resend/Twilio magic test addresses need configuration — they're hardcoded:
325  #   Resend: delivered@resend.dev, bounced@resend.dev, complained@resend.dev
326  #   Twilio: +15005550006 (valid), +15005550001 (invalid), +15005550007 (no SMS)
327  TEST_E2E_KEYWORD=site:yoursite.com/terms
328  
329  # ─── Loopback SMS E2E Test (npm run test:e2e:sms) ────────────────────────────
330  # Sends a real SMS from a Twilio number to itself to verify full send→receive loop.
331  # Throttled to once per 24 hours (gate file: /tmp/sms-loopback-gate.json).
332  # Override throttle: SMS_LOOPBACK_FORCE=1 npm run test:e2e:sms
333  #
334  # TWILIO_LOOPBACK_FROM — Twilio number that sends the test SMS (e.g. AU: +61468089949)
335  # TWILIO_LOOPBACK_TO   — different Twilio number that receives it (e.g. CA: +18254794242)
336  # Note: FROM and TO must be different — Twilio rejects sending from a number to itself
337  TWILIO_LOOPBACK_FROM=+61468089949
338  TWILIO_LOOPBACK_TO=+18254794242
339  # Inbound email integration tests — point to test subdomain inbox to isolate from prod
340  # TEST_EMAIL_EVENTS_WORKER_URL is auto-applied during inbound E2E tests (reverts when done)
341  TEST_INBOUND_EMAIL=marcus@test.auditandfix.com
342  TEST_EMAIL_EVENTS_WORKER_URL=https://resend-webhook-worker-test.auditandfix.workers.dev