market-expansion.md
1 --- 2 title: 'Market Expansion' 3 category: 'business' 4 last_verified: '2026-02-15' 5 related_files: 6 - 'src/config/countries.js' 7 - 'src/scrape.js' 8 - 'src/process.js' 9 - 'src/poc.js' 10 - 'src/proposal-generator-v2.js' 11 tags: ['market', 'expansion', 'testing', 'database', 'api', 'ai', 'llm', 'email'] 12 status: 'current' 13 --- 14 15 # Multi-Country Support Implementation Plan 16 17 ## Overview 18 19 Expand the 333 Method to support multiple countries with region-specific Google searches, proper language/currency handling, and localized proposal generation. 20 21 ## Database Changes 22 23 ### 1. Add Region/Country Fields to Sites Table 24 25 ```sql 26 ALTER TABLE sites ADD COLUMN country_code TEXT; -- ISO 3166-1 alpha-2 (e.g., 'AU', 'US', 'UK', 'NZ') 27 ALTER TABLE sites ADD COLUMN google_domain TEXT; -- e.g., 'google.com.au', 'google.co.uk' 28 ALTER TABLE sites ADD COLUMN language_code TEXT; -- ISO 639-1 (e.g., 'en', 'es', 'fr') 29 ALTER TABLE sites ADD COLUMN currency_code TEXT; -- ISO 4217 (e.g., 'AUD', 'USD', 'GBP') 30 CREATE INDEX idx_sites_country ON sites(country_code); 31 ``` 32 33 ### 2. Add Region/Country to Keywords Table 34 35 ```sql 36 ALTER TABLE keywords ADD COLUMN country_code TEXT DEFAULT 'AU'; 37 ALTER TABLE keywords ADD COLUMN google_domain TEXT DEFAULT 'google.com.au'; 38 ``` 39 40 ### 3. Migration Script 41 42 Create `db/migrations/add-multi-country-support.sql`: 43 44 - Add new columns with defaults 45 - Backfill existing rows (default to Australia: 'AU', 'google.com.au', 'en', 'AUD') 46 - Create indexes 47 48 ## Country Configuration 49 50 ### 4. Create Country Registry (`src/config/countries.js`) 51 52 ```javascript 53 export const COUNTRIES = { 54 AU: { 55 code: 'AU', 56 name: 'Australia', 57 googleDomain: 'google.com.au', 58 language: 'en', 59 currency: 'AUD', 60 currencySymbol: '$', 61 dateFormat: 'DD/MM/YYYY', 62 phoneFormat: '+61', 63 commonCities: ['sydney', 'melbourne', 'brisbane', 'perth', 'adelaide'], 64 }, 65 US: { 66 code: 'US', 67 name: 'United States', 68 googleDomain: 'google.com', 69 language: 'en', 70 currency: 'USD', 71 currencySymbol: '$', 72 dateFormat: 'MM/DD/YYYY', 73 phoneFormat: '+1', 74 commonCities: ['new york', 'los angeles', 'chicago', 'houston', 'phoenix'], 75 }, 76 UK: { 77 code: 'UK', 78 name: 'United Kingdom', 79 googleDomain: 'google.co.uk', 80 language: 'en', 81 currency: 'GBP', 82 currencySymbol: '£', 83 dateFormat: 'DD/MM/YYYY', 84 phoneFormat: '+44', 85 commonCities: ['london', 'manchester', 'birmingham', 'glasgow', 'liverpool'], 86 }, 87 NZ: { 88 code: 'NZ', 89 name: 'New Zealand', 90 googleDomain: 'google.co.nz', 91 language: 'en', 92 currency: 'NZD', 93 currencySymbol: '$', 94 dateFormat: 'DD/MM/YYYY', 95 phoneFormat: '+64', 96 commonCities: ['auckland', 'wellington', 'christchurch', 'hamilton', 'dunedin'], 97 }, 98 CA: { 99 code: 'CA', 100 name: 'Canada', 101 googleDomain: 'google.ca', 102 language: 'en', 103 currency: 'CAD', 104 currencySymbol: '$', 105 dateFormat: 'DD/MM/YYYY', 106 phoneFormat: '+1', 107 commonCities: ['toronto', 'montreal', 'vancouver', 'calgary', 'ottawa'], 108 }, 109 }; 110 111 export function getCountryByCode(code) { 112 if (!code) throw new Error('Country code is required'); 113 const aliases = { GB: 'UK' }; 114 const lookupCode = aliases[code.toUpperCase()] || code.toUpperCase(); 115 return COUNTRIES[lookupCode] || null; // Returns null for unknown codes (e.g. 'EU') 116 } 117 118 export function getCountryByGoogleDomain(domain) { 119 return Object.values(COUNTRIES).find(c => c.googleDomain === domain) || null; 120 } 121 ``` 122 123 ## SERP Scraping Updates 124 125 ### 5. Update `src/scrape.js` - ZenRows Integration 126 127 **Current:** 128 129 ```javascript 130 const response = await axios.get('https://api.zenrows.com/v1/', { 131 params: { 132 url: `https://www.google.com/search?q=${encodeURIComponent(keyword)}`, 133 apikey: zenrowsKey, 134 js_render: 'true', 135 }, 136 }); 137 ``` 138 139 **New:** 140 141 ```javascript 142 import { getCountryByCode } from './config/countries.js'; 143 144 export async function scrapeSERP(keyword, countryCode = 'AU') { 145 const country = getCountryByCode(countryCode); 146 147 const response = await axios.get('https://api.zenrows.com/v1/', { 148 params: { 149 url: `https://www.${country.googleDomain}/search?q=${encodeURIComponent(keyword)}`, 150 apikey: zenrowsKey, 151 js_render: 'true', 152 // ZenRows geo-targeting parameters 153 premium_proxy: 'true', 154 proxy_country: country.code.toLowerCase(), 155 }, 156 }); 157 158 // ... rest of scraping logic 159 160 // Return with country metadata 161 return { 162 results, 163 metadata: { 164 countryCode: country.code, 165 googleDomain: country.googleDomain, 166 language: country.language, 167 currency: country.currency, 168 }, 169 }; 170 } 171 ``` 172 173 ## Processing Pipeline Updates 174 175 ### 6. Update `src/process.js` - Store Country Metadata 176 177 ```javascript 178 // When inserting sites, include country data 179 const stmt = db.prepare(` 180 INSERT INTO sites ( 181 domain, landing_page_url, keyword, 182 country_code, google_domain, language_code, currency_code 183 ) VALUES (?, ?, ?, ?, ?, ?, ?) 184 `); 185 186 stmt.run( 187 domain, 188 url, 189 keyword, 190 metadata.countryCode, 191 metadata.googleDomain, 192 metadata.language, 193 metadata.currency 194 ); 195 ``` 196 197 ### 7. Update `src/poc.js` - Accept Country Parameter 198 199 ```javascript 200 async function main(keyword = 'plumber sydney', countryCode = 'AU') { 201 logger.info(`Running POC for keyword: "${keyword}" in ${countryCode}`); 202 203 const serpData = await scrapeSERP(keyword, countryCode); 204 // ... rest of pipeline 205 } 206 207 // CLI: node src/poc.js "plumber london" UK 208 if (import.meta.url === `file://${process.argv[1]}`) { 209 const keyword = process.argv[2] || 'plumber sydney'; 210 const countryCode = process.argv[3] || 'AU'; 211 main(keyword, countryCode); 212 } 213 ``` 214 215 ## Proposal Generation Updates 216 217 ### 8. Update `src/proposal-generator-v2.js` - Localized Proposals 218 219 ```javascript 220 import { getCountryByCode } from './config/countries.js'; 221 222 function buildProposalContext(siteData, scoreData, contactData, businessType) { 223 const country = getCountryByCode(siteData.country_code || 'AU'); 224 225 const weaknesses = extractKeyWeaknesses(scoreData); 226 const availableChannels = extractAvailableChannels(contactData); 227 228 return `Generate personalized sales proposals for this website: 229 230 TARGET BUSINESS: 231 - Domain: ${siteData.domain} 232 - URL: ${siteData.landing_page_url} 233 - Business Type: ${businessType} 234 - Location: ${extractLocation(siteData.keyword)} 235 - Keyword: "${siteData.keyword}" 236 - Country: ${country.name} (${country.code}) 237 - Language: ${country.language === 'en' ? 'English' : country.language} 238 - Currency: ${country.currencySymbol} ${country.currency} 239 240 CONVERSION ANALYSIS: 241 - Score: ${siteData.grade || 'N/A'} (${siteData.score || 'N/A'}/100) 242 243 KEY WEAKNESSES IDENTIFIED: 244 ${weaknesses} 245 246 AVAILABLE CONTACT CHANNELS: 247 ${availableChannels} 248 249 IMPORTANT LOCALIZATION REQUIREMENTS: 250 - Write proposals in ${country.language === 'en' ? 'English' : country.language} language 251 - Use ${country.name} spelling and terminology (e.g., ${country.code === 'AU' ? 'optimise not optimize' : 'optimize not optimise'}) 252 - Reference prices in ${country.currencySymbol} ${country.currency} 253 - Use appropriate cultural references and business norms for ${country.name} 254 - Consider local timezone (estimated call times, response expectations) 255 - Use local date format: ${country.dateFormat} 256 257 Generate 3 different variants targeting different channels and angles.`; 258 } 259 ``` 260 261 ## Contact Prioritization Updates 262 263 ### 9. Update `src/contacts/prioritize.js` - Country-Specific Phone Validation 264 265 ```javascript 266 import { getCountryByCode } from '../config/countries.js'; 267 268 export function parseAvailableChannels(contactsJson, countryCode = 'AU') { 269 if (!contactsJson) { 270 return {}; 271 } 272 273 const country = getCountryByCode(countryCode); 274 const channels = {}; 275 276 // Email addresses (universal) 277 if (contactsJson.email && Array.isArray(contactsJson.email) && contactsJson.email.length > 0) { 278 channels.Email = contactsJson.email[0]; 279 } 280 281 // Phone numbers (country-specific validation) 282 if (contactsJson.phone && Array.isArray(contactsJson.phone) && contactsJson.phone.length > 0) { 283 const mobileNumbers = contactsJson.phone.filter(phone => isMobileNumber(phone, country)); 284 285 if (mobileNumbers.length > 0) { 286 channels.SMS = mobileNumbers[0]; 287 } else { 288 channels.SMS = contactsJson.phone[0]; 289 } 290 } 291 292 // ... rest of channel parsing 293 294 return channels; 295 } 296 297 function isMobileNumber(phone, country) { 298 // Country-specific mobile number patterns 299 const mobilePatterns = { 300 AU: /^(\+61|0)(4\d{8}|614\d{6})$/, // Australian mobiles start with 04 or +614 301 US: /^(\+1|1)?[2-9]\d{9}$/, // US mobile (hard to distinguish from landline) 302 UK: /^(\+44|0)(7\d{9}|447\d{8})$/, // UK mobiles start with 07 or +447 303 NZ: /^(\+64|0)(2[0-9]\d{7,8})$/, // NZ mobiles start with 02 or +642 304 CA: /^(\+1|1)?[2-9]\d{9}$/, // Canadian mobile (similar to US) 305 }; 306 307 const pattern = mobilePatterns[country.code] || mobilePatterns.AU; 308 return pattern.test(phone.replace(/\s+/g, '')); 309 } 310 ``` 311 312 ## Data Files Updates 313 314 ### 10. Create Country-Specific Keywords Files 315 316 ``` 317 data/ 318 keywords/ 319 au-keywords.txt # Australian keywords 320 us-keywords.txt # US keywords 321 uk-keywords.txt # UK keywords 322 nz-keywords.txt # NZ keywords 323 ca-keywords.txt # Canadian keywords 324 ``` 325 326 ### 11. Create Country-Specific Cities/Regions 327 328 ``` 329 data/ 330 regions/ 331 regions-au.txt # Sydney, Melbourne, Brisbane, etc. 332 regions-us.txt # New York, Los Angeles, Chicago, etc. 333 regions-uk.txt # London, Manchester, Birmingham, etc. 334 regions-nz.txt # Auckland, Wellington, Christchurch, etc. 335 regions-ca.txt # Toronto, Montreal, Vancouver, etc. 336 ``` 337 338 ## Keyword Manager Updates 339 340 ### 12. Update `src/utils/keyword-manager.js` 341 342 ```javascript 343 export function trackKeyword(db, keyword, serpResultCount, countryCode = 'AU') { 344 const country = getCountryByCode(countryCode); 345 346 const stmt = db.prepare(` 347 INSERT INTO keywords (keyword, serp_result_count, country_code, google_domain, last_scraped_at) 348 VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP) 349 ON CONFLICT(keyword, country_code) DO UPDATE SET 350 serp_result_count = excluded.serp_result_count, 351 last_scraped_at = CURRENT_TIMESTAMP 352 `); 353 354 stmt.run(keyword, serpResultCount, country.code, country.googleDomain); 355 } 356 ``` 357 358 ## Testing Strategy 359 360 ### 13. Multi-Country Integration Tests 361 362 Create `tests/multi-country.integration.test.js`: 363 364 ```javascript 365 describe('Multi-Country Support', () => { 366 it('should scrape Australian Google', async () => { 367 const data = await scrapeSERP('plumber sydney', 'AU'); 368 assert.equal(data.metadata.googleDomain, 'google.com.au'); 369 }); 370 371 it('should scrape US Google', async () => { 372 const data = await scrapeSERP('plumber seattle', 'US'); 373 assert.equal(data.metadata.googleDomain, 'google.com'); 374 }); 375 376 it('should generate UK-localized proposals', async () => { 377 const proposal = await generateProposal(siteId, 'UK'); 378 assert.match(proposal.text, /£/); // UK currency symbol 379 assert.match(proposal.text, /optimise/); // UK spelling 380 }); 381 }); 382 ``` 383 384 ## CLI Updates 385 386 ### 14. Update All CLI Commands to Accept Country Parameter 387 388 ```bash 389 # POC with country 390 npm run poc "plumber london" UK 391 392 # Process with country 393 node src/process.js --country=US 394 395 # Bulk proposals for specific country 396 node src/proposal-generator-v2.js bulk --country=AU 397 ``` 398 399 ## Configuration 400 401 ### 15. Environment Variables 402 403 Add to `.env`: 404 405 ```bash 406 ZENROWS_PREMIUM=true # Required for geo-targeting 407 ``` 408 409 ## Implementation Order 410 411 1. **Phase 1: Database & Config** (1-2h) 412 - Create migration script 413 - Add countries.js config 414 - Run migration 415 416 2. **Phase 2: SERP Scraping** (2-3h) 417 - Update scrape.js with country support 418 - Update poc.js and process.js 419 - Test with multiple countries 420 421 3. **Phase 3: Proposal Generation** (1-2h) 422 - Update proposal-generator-v2.js 423 - Add localization context 424 - Test language/currency handling 425 426 4. **Phase 4: Contact Prioritization** (1h) 427 - Update phone number validation 428 - Country-specific patterns 429 430 5. **Phase 5: Testing & Documentation** (1-2h) 431 - Integration tests 432 - Update README 433 - Create country-specific data files 434 435 ## Total Estimated Time: 6-10 hours 436 437 ## Benefits 438 439 1. **Market Expansion**: Support US, UK, Canada, NZ markets immediately 440 2. **Proper Localization**: Language, currency, cultural references 441 3. **Better Results**: Region-specific Google results 442 4. **Phone Validation**: Country-specific mobile number detection 443 5. **Scalability**: Easy to add new countries 444 445 ## GDPR Compliance for EU/UK Markets 446 447 ### Overview 448 449 GDPR regulations prohibit sending unsolicited emails to sole traders and partnerships without express consent. We must filter these out for EU/UK sites by identifying whether the business is a registered company (Ltd, GmbH, etc.). 450 451 ### 16. Database Schema Updates 452 453 Add new columns to track company verification: 454 455 ```sql 456 ALTER TABLE sites ADD COLUMN company_proof TEXT; -- Full legal name + registration details 457 ALTER TABLE sites ADD COLUMN key_pages TEXT; -- JSON array of absolute URLs (Contact, Impressum, About, Legal) 458 ``` 459 460 **Example `company_proof` content:** 461 462 ``` 463 Acme Plumbing GmbH 464 Company Registration: HRB 12345 465 VAT ID: DE123456789 466 Registered Office: Berlin, Germany 467 ``` 468 469 **Example `key_pages` content:** 470 471 ```json 472 [ 473 "https://example.de/impressum", 474 "https://example.de/kontakt", 475 "https://example.de/about-us", 476 "https://example.de/legal" 477 ] 478 ``` 479 480 ### 17. Free Email Provider Filtering 481 482 **Step 1: Identify Free Email Providers** 483 484 Before any company verification, discard email addresses from known free providers: 485 486 ```javascript 487 const FREE_EMAIL_PROVIDERS = [ 488 'gmail.com', 489 'googlemail.com', 490 'outlook.com', 491 'hotmail.com', 492 'live.com', 493 'yahoo.com', 494 'yahoo.co.uk', 495 'yahoo.de', 496 'gmx.com', 497 'gmx.de', 498 'gmx.net', 499 'web.de', 500 'mail.com', 501 'icloud.com', 502 'me.com', 503 'aol.com', 504 'protonmail.com', 505 // Add more as needed 506 ]; 507 ``` 508 509 If email domain matches any of these, skip the email entirely. 510 511 ### 18. Company Verification Strategy 512 513 **Step 2: Search HTML DOM for Company Indicators** 514 515 Search the stored `full_html` column for: 516 517 #### Country-Specific Company Type Strings: 518 519 - **UK**: Ltd, Limited, PLC, LLP 520 - **Germany**: GmbH, AG, UG (haftungsbeschränkt), KG 521 - **France**: SARL, SA, SAS, EURL 522 - **Italy**: S.r.l., S.p.A., S.n.c. 523 - **Spain**: SL, SA, SRL 524 - **Netherlands**: BV, NV 525 - **Poland**: Sp. z o.o., S.A. 526 - **Finland**: Oy, Oyj 527 - **Sweden**: AB 528 - **Austria**: GmbH, AG 529 530 #### Company Registration Indicators: 531 532 - "Company number" / "Unternehmensnummer" / "Numéro d'entreprise" 533 - "Company registration" / "Handelsregister" / "Registre du commerce" 534 - "VAT ID" / "VAT number" / "USt-IdNr." / "Numéro de TVA" / "Partita IVA" 535 - "Registered office" / "Registrierter Sitz" / "Siège social" 536 - "Trade register" / "Commercial register" 537 538 **Step 3: Extract Company Proof** 539 540 When company type strings are found: 541 542 1. Extract the full legal company name (e.g., "Acme Plumbing GmbH") 543 2. Extract any adjacent company registration numbers 544 3. Extract VAT IDs if present 545 4. Store all in `sites.company_proof` as plain text 546 547 **Step 4: Extract Key Pages** 548 549 While parsing the DOM, also extract links to these page types (in target language): 550 551 - **Impressum** (DE/AT): "impressum", "imprint" 552 - **Contact**: "contact", "kontakt", "contacto", "contatto", "nous contacter" 553 - **About**: "about", "über uns", "à propos", "chi siamo", "o nas" 554 - **Legal**: "legal", "rechtliches", "mentions légales", "note legali", "rechtliche hinweise" 555 - **Privacy**: "privacy", "datenschutz", "confidentialité", "privacy policy" 556 557 Store as JSON array of absolute URLs in `sites.key_pages`. 558 559 ### 19. Secondary Verification via Key Pages 560 561 **Step 5: If No Company Proof Found in Main HTML** 562 563 Visit the extracted key pages in order of likelihood: 564 565 1. **Impressum** (mandatory in DE/AT, highest likelihood) 566 2. **Legal** pages 567 3. **About** pages 568 4. **Contact** pages 569 570 For each page: 571 572 - Use browser automation (Playwright) to visit 573 - Search page content for same company type strings and registration indicators 574 - Stop at first successful match 575 - Update `sites.company_proof` with findings 576 577 **Priority order rationale:** 578 579 - Impressum pages are legally required in DE/AT and almost always contain company details 580 - Legal pages often have company registration info 581 - About pages sometimes mention company structure 582 - Contact pages occasionally list company details 583 584 ### 20. Handling Unverified Businesses 585 586 **Step 6: Mark Unverifiable Businesses** 587 588 If after DOM search + key page visits no company proof is found: 589 590 ```sql 591 UPDATE outreaches 592 SET delivery_status = 'email may be individual/partnership' 593 WHERE site_id = ? 594 AND contact_method = 'Email'; 595 ``` 596 597 These will NOT be sent emails unless/until verified. 598 599 **Future enhancement:** If sufficient volume accumulates, consider: 600 601 - Using ZenRows to fetch additional pages 602 - Calling public company registry APIs (e.g., UK Companies House API) 603 - Manual verification workflow for high-value prospects 604 605 ### 21. Country-Specific Implementation 606 607 Add to `src/config/countries.js`: 608 609 ```javascript 610 export const COUNTRIES = { 611 // ... existing countries 612 613 DE: { 614 code: 'DE', 615 name: 'Germany', 616 googleDomain: 'google.de', 617 language: 'de', 618 currency: 'EUR', 619 currencySymbol: '€', 620 requiresGDPRCheck: true, 621 companyTypes: ['GmbH', 'AG', 'UG (haftungsbeschränkt)', 'UG', 'KG', 'e.V.'], 622 companyKeywords: [ 623 'Handelsregister', 624 'Unternehmensnummer', 625 'USt-IdNr', 626 'Geschäftsführer', 627 'Registergericht', 628 ], 629 keyPageNames: ['impressum', 'kontakt', 'über uns', 'rechtliches'], 630 }, 631 632 UK: { 633 code: 'UK', 634 name: 'United Kingdom', 635 googleDomain: 'google.co.uk', 636 language: 'en', 637 currency: 'GBP', 638 currencySymbol: '£', 639 requiresGDPRCheck: true, 640 companyTypes: ['Ltd', 'Limited', 'PLC', 'LLP', 'Limited Liability Partnership'], 641 companyKeywords: [ 642 'Company number', 643 'Company registration', 644 'Registered in England', 645 'Registered office', 646 'VAT number', 647 ], 648 keyPageNames: ['contact', 'about', 'legal', 'company information'], 649 }, 650 651 FR: { 652 code: 'FR', 653 name: 'France', 654 googleDomain: 'google.fr', 655 language: 'fr', 656 currency: 'EUR', 657 currencySymbol: '€', 658 requiresGDPRCheck: true, 659 companyTypes: ['SARL', 'SA', 'SAS', 'EURL', 'SNC', 'SCS'], 660 companyKeywords: [ 661 'Numéro SIREN', 662 'Numéro SIRET', 663 'RCS', 664 'Capital social', 665 'Siège social', 666 'Numéro de TVA', 667 ], 668 keyPageNames: ['contact', 'à propos', 'mentions légales', 'informations légales'], 669 }, 670 671 // Add more EU countries as needed... 672 }; 673 ``` 674 675 ### 22. Processing Workflow 676 677 ``` 678 For each EU/UK site: 679 1. Extract email addresses from contacts_json 680 2. Filter out free email providers → discard if match 681 3. Search full_html for company type strings 682 4. If found → extract company_proof + key_pages → mark verified 683 5. If not found → visit key_pages in priority order 684 - Visit Impressum/Legal/About/Contact 685 - Search each for company indicators 686 - If found → extract company_proof → mark verified 687 6. If still not found → set delivery_status = 'email may be individual/partnership' 688 7. Only send emails to verified companies 689 ``` 690 691 ### 23. Reporting & Monitoring 692 693 Track verification success rates: 694 695 ```sql 696 SELECT 697 country_code, 698 COUNT(*) as total_sites, 699 SUM(CASE WHEN company_proof IS NOT NULL THEN 1 ELSE 0 END) as verified, 700 SUM(CASE WHEN company_proof IS NULL THEN 1 ELSE 0 END) as unverified, 701 ROUND(100.0 * SUM(CASE WHEN company_proof IS NOT NULL THEN 1 ELSE 0 END) / COUNT(*), 2) as verification_rate 702 FROM sites 703 WHERE country_code IN ('DE', 'UK', 'FR', 'IT', 'ES', 'NL', 'PL', 'AT') 704 GROUP BY country_code; 705 ``` 706 707 ## Future Enhancements 708 709 - Multi-language support (Spanish, French, etc.) 710 - Timezone-aware scheduling 711 - Country-specific pricing models 712 - Regional compliance (GDPR, CAN-SPAM equivalents) 713 - Integration with company registry APIs (UK Companies House, German Handelsregister, etc.) 714 - Automated re-verification for sites where company_proof expires