position-description-ingester.js
1 #!/usr/bin/env node 2 3 /** 4 * Position Description Ingester 5 * 6 * Analyzes job descriptions to extract key requirements, skills, and context 7 * for tailoring CV content to specific opportunities. This enables the AI 8 * enhancement system to emphasize relevant experience and skills. 9 * 10 * Features: 11 * - Parse job descriptions from various formats (text, URL, PDF) 12 * - Extract key skills, technologies, and requirements 13 * - Analyze company culture and values alignment 14 * - Generate targeting insights for CV customization 15 * - Store processed job descriptions for reuse 16 * 17 * Usage: 18 * node position-description-ingester.js --text "job description text" 19 * node position-description-ingester.js --url "https://company.com/job" 20 * node position-description-ingester.js --file "job-description.pdf" 21 * 22 * @author Adrian Wedd 23 * @version 1.0.0 24 */ 25 26 const fs = require('fs').promises; 27 const path = require('path'); 28 const crypto = require('crypto'); 29 30 class PositionDescriptionIngester { 31 constructor() { 32 this.dataDir = path.resolve(__dirname, '../../data'); 33 this.positionsDir = path.join(this.dataDir, 'positions'); 34 this.outputDir = path.join(this.dataDir, 'targeting'); 35 36 // Skill categories for classification 37 this.skillCategories = { 38 'programming': [ 39 'python', 'javascript', 'typescript', 'java', 'c++', 'c#', 'rust', 'go', 'php', 40 'ruby', 'swift', 'kotlin', 'scala', 'r', 'matlab', 'sql', 'html', 'css' 41 ], 42 'frameworks': [ 43 'react', 'vue', 'angular', 'django', 'flask', 'spring', 'express', 'fastapi', 44 'tensorflow', 'pytorch', 'scikit-learn', 'pandas', 'numpy', 'bootstrap' 45 ], 46 'cloud_platforms': [ 47 'aws', 'azure', 'gcp', 'google cloud', 'digitalocean', 'heroku', 'vercel' 48 ], 49 'devops': [ 50 'docker', 'kubernetes', 'jenkins', 'github actions', 'ci/cd', 'terraform', 51 'ansible', 'puppet', 'chef', 'vagrant', 'monitoring', 'logging' 52 ], 53 'databases': [ 54 'postgresql', 'mysql', 'mongodb', 'redis', 'elasticsearch', 'cassandra', 55 'dynamodb', 'sqlite', 'oracle', 'sql server' 56 ], 57 'ai_ml': [ 58 'machine learning', 'artificial intelligence', 'deep learning', 'nlp', 59 'computer vision', 'data science', 'neural networks', 'transformers', 60 'llm', 'generative ai', 'gpt', 'claude', 'openai' 61 ], 62 'systems': [ 63 'linux', 'unix', 'windows server', 'networking', 'security', 'cybersecurity', 64 'system administration', 'infrastructure', 'performance tuning' 65 ], 66 'methodologies': [ 67 'agile', 'scrum', 'kanban', 'devops', 'microservices', 'api design', 68 'system design', 'architecture', 'testing', 'tdd', 'bdd' 69 ] 70 }; 71 72 // Experience level indicators 73 this.experienceLevels = { 74 'entry': ['junior', 'entry', 'graduate', 'associate', '0-2 years', 'new grad'], 75 'mid': ['mid', 'intermediate', '2-5 years', '3-7 years', 'experienced'], 76 'senior': ['senior', 'lead', 'principal', '5+ years', '7+ years', 'expert'], 77 'management': ['manager', 'director', 'head of', 'vp', 'chief', 'team lead'] 78 }; 79 80 // Company culture indicators 81 this.cultureIndicators = { 82 'innovation': ['innovative', 'cutting-edge', 'pioneering', 'disruptive', 'startup'], 83 'growth': ['scaling', 'growing', 'expanding', 'fast-paced', 'dynamic'], 84 'collaboration': ['team', 'collaborative', 'cross-functional', 'partnership'], 85 'impact': ['mission', 'purpose', 'social impact', 'meaningful', 'change'], 86 'stability': ['established', 'stable', 'mature', 'enterprise', 'fortune'] 87 }; 88 } 89 90 /** 91 * Initialize the ingestion system 92 */ 93 async initialize() { 94 console.log('šÆ Initializing Position Description Ingester...'); 95 96 // Create directories if they don't exist 97 await fs.mkdir(this.positionsDir, { recursive: true }); 98 await fs.mkdir(this.outputDir, { recursive: true }); 99 100 console.log('ā Directory structure initialized'); 101 } 102 103 /** 104 * Ingest job description from text 105 */ 106 async ingestFromText(jobText, metadata = {}) { 107 console.log('š Processing job description from text...'); 108 109 const analysis = await this.analyzeJobDescription(jobText, metadata); 110 const jobId = this.generateJobId(jobText, metadata); 111 112 await this.saveJobAnalysis(jobId, analysis); 113 await this.generateTargetingInsights(jobId, analysis); 114 115 console.log(`ā Job analysis completed: ${jobId}`); 116 return { jobId, analysis }; 117 } 118 119 /** 120 * Ingest job description from URL 121 */ 122 async ingestFromUrl(url) { 123 console.log(`š Processing job description from URL: ${url}`); 124 125 try { 126 // For now, this is a placeholder for web scraping functionality 127 // In the future, this could use Playwright or similar tools 128 console.log('ā ļø URL ingestion requires web scraping implementation'); 129 console.log('š” For now, please copy the job description text and use --text option'); 130 131 return { jobId: null, analysis: null, error: 'URL ingestion not yet implemented' }; 132 } catch (error) { 133 console.error('ā Failed to ingest from URL:', error.message); 134 return { jobId: null, analysis: null, error: error.message }; 135 } 136 } 137 138 /** 139 * Ingest job description from file 140 */ 141 async ingestFromFile(filePath) { 142 console.log(`š Processing job description from file: ${filePath}`); 143 144 try { 145 const content = await fs.readFile(filePath, 'utf8'); 146 const metadata = { 147 source: 'file', 148 file_path: filePath, 149 file_name: path.basename(filePath) 150 }; 151 152 return await this.ingestFromText(content, metadata); 153 } catch (error) { 154 console.error('ā Failed to read file:', error.message); 155 return { jobId: null, analysis: null, error: error.message }; 156 } 157 } 158 159 /** 160 * Analyze job description content 161 */ 162 async analyzeJobDescription(text, metadata = {}) { 163 const analysis = { 164 metadata: { 165 analyzed_at: new Date().toISOString(), 166 source: metadata.source || 'text', 167 ingester_version: '1.0.0', 168 ...metadata 169 }, 170 raw_text: text, 171 extracted_data: {}, 172 targeting_insights: {} 173 }; 174 175 // Extract basic information 176 analysis.extracted_data = { 177 job_title: this.extractJobTitle(text), 178 company: this.extractCompany(text), 179 location: this.extractLocation(text), 180 experience_level: this.classifyExperienceLevel(text), 181 employment_type: this.extractEmploymentType(text), 182 salary_range: this.extractSalaryRange(text) 183 }; 184 185 // Extract skills and technologies 186 analysis.extracted_data.required_skills = this.extractSkills(text, 'required'); 187 analysis.extracted_data.preferred_skills = this.extractSkills(text, 'preferred'); 188 analysis.extracted_data.technology_stack = this.categorizeSkills( 189 [...analysis.extracted_data.required_skills, ...analysis.extracted_data.preferred_skills] 190 ); 191 192 // Extract responsibilities and requirements 193 analysis.extracted_data.key_responsibilities = this.extractResponsibilities(text); 194 analysis.extracted_data.qualifications = this.extractQualifications(text); 195 196 // Analyze company culture 197 analysis.extracted_data.culture_indicators = this.analyzeCulture(text); 198 199 // Generate matching insights 200 analysis.targeting_insights = await this.generateMatchingInsights(analysis.extracted_data); 201 202 return analysis; 203 } 204 205 /** 206 * Extract job title from text 207 */ 208 extractJobTitle(text) { 209 // Look for common job title patterns 210 const titlePatterns = [ 211 /(?:position|role|job|title):\s*([^\n]+)/i, 212 /^([^\n]+(?:engineer|developer|analyst|manager|director|specialist|coordinator))/i, 213 /job title:\s*([^\n]+)/i 214 ]; 215 216 for (const pattern of titlePatterns) { 217 const match = text.match(pattern); 218 if (match) { 219 return match[1].trim(); 220 } 221 } 222 223 // Fallback: look for capitalized words that might be job titles 224 const lines = text.split('\n').slice(0, 5); // Check first few lines 225 for (const line of lines) { 226 if (line.length < 100 && /[A-Z]/.test(line) && 227 (line.includes('Engineer') || line.includes('Developer') || 228 line.includes('Analyst') || line.includes('Manager'))) { 229 return line.trim(); 230 } 231 } 232 233 return 'Unknown Position'; 234 } 235 236 /** 237 * Extract company name from text 238 */ 239 extractCompany(text) { 240 const companyPatterns = [ 241 /company:\s*([^\n]+)/i, 242 /employer:\s*([^\n]+)/i, 243 /organization:\s*([^\n]+)/i, 244 /(?:at|join)\s+([A-Z][a-zA-Z\s&]+?)(?:\s+is|,|\.|$)/ 245 ]; 246 247 for (const pattern of companyPatterns) { 248 const match = text.match(pattern); 249 if (match) { 250 return match[1].trim(); 251 } 252 } 253 254 return 'Unknown Company'; 255 } 256 257 /** 258 * Extract location from text 259 */ 260 extractLocation(text) { 261 const locationPatterns = [ 262 /location:\s*([^\n]+)/i, 263 /based in:\s*([^\n]+)/i, 264 /(?:remote|hybrid|onsite).*?([A-Z][a-z]+,\s*[A-Z]{2,})/, 265 /([A-Z][a-z]+,\s*(?:Australia|USA|UK|Canada|Germany))/i 266 ]; 267 268 for (const pattern of locationPatterns) { 269 const match = text.match(pattern); 270 if (match) { 271 return match[1].trim(); 272 } 273 } 274 275 return 'Location not specified'; 276 } 277 278 /** 279 * Extract employment type from text 280 */ 281 extractEmploymentType(text) { 282 const lowerText = text.toLowerCase(); 283 284 if (lowerText.includes('full-time') || lowerText.includes('full time')) { 285 return 'full-time'; 286 } else if (lowerText.includes('part-time') || lowerText.includes('part time')) { 287 return 'part-time'; 288 } else if (lowerText.includes('contract') || lowerText.includes('contractor')) { 289 return 'contract'; 290 } else if (lowerText.includes('freelance') || lowerText.includes('consultant')) { 291 return 'freelance'; 292 } else if (lowerText.includes('intern') || lowerText.includes('internship')) { 293 return 'internship'; 294 } 295 296 return 'not specified'; 297 } 298 299 /** 300 * Extract salary range from text 301 */ 302 extractSalaryRange(text) { 303 const salaryPatterns = [ 304 /\$[\d,]+\s*-\s*\$[\d,]+/g, 305 /salary:\s*([^\n]+)/i, 306 /compensation:\s*([^\n]+)/i, 307 /[\d,]+k?\s*-\s*[\d,]+k?\s*(?:AUD|USD|per year|annually)/i 308 ]; 309 310 for (const pattern of salaryPatterns) { 311 const match = text.match(pattern); 312 if (match) { 313 return match[0].trim(); 314 } 315 } 316 317 return 'not specified'; 318 } 319 320 /** 321 * Classify experience level 322 */ 323 classifyExperienceLevel(text) { 324 const lowerText = text.toLowerCase(); 325 326 for (const [level, indicators] of Object.entries(this.experienceLevels)) { 327 for (const indicator of indicators) { 328 if (lowerText.includes(indicator)) { 329 return level; 330 } 331 } 332 } 333 334 return 'unspecified'; 335 } 336 337 /** 338 * Extract skills from text 339 */ 340 extractSkills(text, _type = 'all') { 341 const skills = new Set(); 342 const lowerText = text.toLowerCase(); 343 344 // Check all skill categories 345 for (const [_category, skillList] of Object.entries(this.skillCategories)) { 346 for (const skill of skillList) { 347 if (lowerText.includes(skill.toLowerCase())) { 348 skills.add(skill); 349 } 350 } 351 } 352 353 return Array.from(skills); 354 } 355 356 /** 357 * Categorize skills by type 358 */ 359 categorizeSkills(skills) { 360 const categorized = {}; 361 362 for (const [category, skillList] of Object.entries(this.skillCategories)) { 363 categorized[category] = skills.filter(skill => 364 skillList.some(s => s.toLowerCase() === skill.toLowerCase()) 365 ); 366 } 367 368 return categorized; 369 } 370 371 /** 372 * Extract key responsibilities 373 */ 374 extractResponsibilities(text) { 375 const responsibilities = []; 376 const lines = text.split('\n'); 377 378 let inResponsibilities = false; 379 for (const line of lines) { 380 const trimmed = line.trim(); 381 382 if (/responsibilities|duties|role includes/i.test(trimmed)) { 383 inResponsibilities = true; 384 continue; 385 } 386 387 if (inResponsibilities) { 388 if (/requirements|qualifications|skills/i.test(trimmed)) { 389 break; 390 } 391 392 if (trimmed.match(/^[-ā¢*]\s+(.+)/) || trimmed.match(/^\d+\.\s+(.+)/)) { 393 responsibilities.push(trimmed.replace(/^[-ā¢*]\s*/, '').replace(/^\d+\.\s*/, '')); 394 } 395 } 396 } 397 398 return responsibilities; 399 } 400 401 /** 402 * Extract qualifications 403 */ 404 extractQualifications(text) { 405 const qualifications = []; 406 const lines = text.split('\n'); 407 408 let inQualifications = false; 409 for (const line of lines) { 410 const trimmed = line.trim(); 411 412 if (/requirements|qualifications|must have|essential/i.test(trimmed)) { 413 inQualifications = true; 414 continue; 415 } 416 417 if (inQualifications) { 418 if (/benefits|compensation|about us/i.test(trimmed)) { 419 break; 420 } 421 422 if (trimmed.match(/^[-ā¢*]\s+(.+)/) || trimmed.match(/^\d+\.\s+(.+)/)) { 423 qualifications.push(trimmed.replace(/^[-ā¢*]\s*/, '').replace(/^\d+\.\s*/, '')); 424 } 425 } 426 } 427 428 return qualifications; 429 } 430 431 /** 432 * Analyze company culture indicators 433 */ 434 analyzeCulture(text) { 435 const cultureScore = {}; 436 const lowerText = text.toLowerCase(); 437 438 for (const [culture, indicators] of Object.entries(this.cultureIndicators)) { 439 let score = 0; 440 for (const indicator of indicators) { 441 if (lowerText.includes(indicator)) { 442 score++; 443 } 444 } 445 cultureScore[culture] = score; 446 } 447 448 // Find dominant culture traits 449 const sortedCulture = Object.entries(cultureScore) 450 .sort(([,a], [,b]) => b - a) 451 .filter(([,score]) => score > 0); 452 453 return { 454 scores: cultureScore, 455 dominant_traits: sortedCulture.slice(0, 3).map(([trait]) => trait) 456 }; 457 } 458 459 /** 460 * Generate matching insights 461 */ 462 async generateMatchingInsights(extractedData) { 463 // Load current CV data for comparison 464 let cvData = {}; 465 try { 466 const cvPath = path.join(this.dataDir, 'base-cv.json'); 467 const content = await fs.readFile(cvPath, 'utf8'); 468 cvData = JSON.parse(content); 469 } catch { 470 console.warn('ā ļø Could not load CV data for matching analysis'); 471 } 472 473 const insights = { 474 skill_matches: this.analyzeSkillMatches(extractedData, cvData), 475 experience_alignment: this.analyzeExperienceAlignment(extractedData, cvData), 476 culture_fit: this.analyzeCultureFit(extractedData, cvData), 477 enhancement_recommendations: this.generateEnhancementRecommendations(extractedData, cvData) 478 }; 479 480 return insights; 481 } 482 483 /** 484 * Analyze skill matches between job and CV 485 */ 486 analyzeSkillMatches(jobData, cvData) { 487 const cvSkills = new Set(); 488 489 // Extract skills from CV 490 if (cvData.skills) { 491 cvData.skills.forEach(skill => cvSkills.add(skill.name.toLowerCase())); 492 } 493 494 const requiredMatches = jobData.required_skills.filter(skill => 495 cvSkills.has(skill.toLowerCase()) 496 ); 497 498 const preferredMatches = jobData.preferred_skills.filter(skill => 499 cvSkills.has(skill.toLowerCase()) 500 ); 501 502 const missingRequired = jobData.required_skills.filter(skill => 503 !cvSkills.has(skill.toLowerCase()) 504 ); 505 506 return { 507 required_matches: requiredMatches, 508 preferred_matches: preferredMatches, 509 missing_required: missingRequired, 510 match_percentage: requiredMatches.length / Math.max(jobData.required_skills.length, 1) * 100 511 }; 512 } 513 514 /** 515 * Analyze experience alignment 516 */ 517 analyzeExperienceAlignment(jobData, cvData) { 518 const alignment = { 519 level_match: false, 520 relevant_experience: [], 521 transferable_skills: [] 522 }; 523 524 // Check experience level alignment 525 if (cvData.experience) { 526 const totalYears = cvData.experience.reduce((total, exp) => { 527 const years = this.calculateYears(exp.period); 528 return total + years; 529 }, 0); 530 531 alignment.level_match = this.matchesExperienceLevel(jobData.experience_level, totalYears); 532 alignment.total_years = totalYears; 533 } 534 535 return alignment; 536 } 537 538 /** 539 * Analyze culture fit 540 */ 541 analyzeCultureFit(jobData, _cvData) { 542 const cultureFit = { 543 alignment_score: 0, 544 matching_values: [], 545 recommendations: [] 546 }; 547 548 // This is a simplified culture analysis 549 // In practice, this would involve more sophisticated matching 550 if (jobData.culture_indicators?.dominant_traits) { 551 cultureFit.identified_culture = jobData.culture_indicators.dominant_traits; 552 } 553 554 return cultureFit; 555 } 556 557 /** 558 * Generate enhancement recommendations 559 */ 560 generateEnhancementRecommendations(jobData, _cvData) { 561 const recommendations = []; 562 563 // Skill gap recommendations 564 if (jobData.required_skills) { 565 recommendations.push({ 566 type: 'skills', 567 priority: 'high', 568 action: 'Emphasize matching skills in professional summary', 569 skills: jobData.required_skills.slice(0, 5) 570 }); 571 } 572 573 // Experience emphasis 574 recommendations.push({ 575 type: 'experience', 576 priority: 'medium', 577 action: 'Highlight relevant experience that matches job responsibilities', 578 focus_areas: jobData.key_responsibilities?.slice(0, 3) || [] 579 }); 580 581 return recommendations; 582 } 583 584 /** 585 * Generate unique job ID 586 */ 587 generateJobId(text, metadata) { 588 const content = text + JSON.stringify(metadata); 589 const hash = crypto.createHash('md5').update(content).digest('hex'); 590 const timestamp = new Date().toISOString().slice(0, 10); 591 return `job-${timestamp}-${hash.slice(0, 8)}`; 592 } 593 594 /** 595 * Save job analysis results 596 */ 597 async saveJobAnalysis(jobId, analysis) { 598 const filePath = path.join(this.positionsDir, `${jobId}.json`); 599 await fs.writeFile(filePath, JSON.stringify(analysis, null, 2), 'utf8'); 600 console.log(`š¾ Job analysis saved: ${filePath}`); 601 } 602 603 /** 604 * Generate targeting insights file 605 */ 606 async generateTargetingInsights(jobId, analysis) { 607 const insights = { 608 job_id: jobId, 609 generated_at: new Date().toISOString(), 610 targeting_profile: { 611 position: analysis.extracted_data.job_title, 612 company: analysis.extracted_data.company, 613 key_skills: analysis.extracted_data.required_skills.slice(0, 10), 614 experience_level: analysis.extracted_data.experience_level, 615 culture_traits: analysis.extracted_data.culture_indicators?.dominant_traits || [] 616 }, 617 cv_customization: analysis.targeting_insights 618 }; 619 620 const insightsPath = path.join(this.outputDir, `targeting-${jobId}.json`); 621 await fs.writeFile(insightsPath, JSON.stringify(insights, null, 2), 'utf8'); 622 623 // Also save as latest targeting insights 624 const latestPath = path.join(this.outputDir, 'latest-targeting.json'); 625 await fs.writeFile(latestPath, JSON.stringify(insights, null, 2), 'utf8'); 626 627 console.log(`šÆ Targeting insights saved: ${insightsPath}`); 628 } 629 630 /** 631 * Helper methods 632 */ 633 calculateYears(period) { 634 // Simple year calculation - in practice this would be more sophisticated 635 const yearMatch = period.match(/(\d{4})\s*-\s*(\d{4}|Present)/); 636 if (yearMatch) { 637 const start = parseInt(yearMatch[1]); 638 const end = yearMatch[2] === 'Present' ? new Date().getFullYear() : parseInt(yearMatch[2]); 639 return end - start; 640 } 641 return 0; 642 } 643 644 matchesExperienceLevel(jobLevel, totalYears) { 645 const levelRanges = { 646 'entry': [0, 2], 647 'mid': [2, 7], 648 'senior': [7, Infinity], 649 'management': [5, Infinity] 650 }; 651 652 if (levelRanges[jobLevel]) { 653 const [min, max] = levelRanges[jobLevel]; 654 return totalYears >= min && totalYears <= max; 655 } 656 657 return false; 658 } 659 } 660 661 // CLI interface 662 async function main() { 663 const ingester = new PositionDescriptionIngester(); 664 await ingester.initialize(); 665 666 const args = process.argv.slice(2); 667 668 if (args.includes('--text')) { 669 const textIndex = args.indexOf('--text'); 670 const jobText = args[textIndex + 1]; 671 if (!jobText) { 672 console.error('ā Please provide job description text after --text'); 673 process.exit(1); 674 } 675 676 const result = await ingester.ingestFromText(jobText, { source: 'cli_text' }); 677 console.log(`\nā Processing complete. Job ID: ${result.jobId}`); 678 679 } else if (args.includes('--url')) { 680 const urlIndex = args.indexOf('--url'); 681 const url = args[urlIndex + 1]; 682 if (!url) { 683 console.error('ā Please provide URL after --url'); 684 process.exit(1); 685 } 686 687 const result = await ingester.ingestFromUrl(url); 688 if (result.error) { 689 console.error(`ā ${result.error}`); 690 process.exit(1); 691 } 692 693 } else if (args.includes('--file')) { 694 const fileIndex = args.indexOf('--file'); 695 const filePath = args[fileIndex + 1]; 696 if (!filePath) { 697 console.error('ā Please provide file path after --file'); 698 process.exit(1); 699 } 700 701 const result = await ingester.ingestFromFile(filePath); 702 if (result.error) { 703 console.error(`ā ${result.error}`); 704 process.exit(1); 705 } 706 console.log(`\nā Processing complete. Job ID: ${result.jobId}`); 707 708 } else { 709 console.log('Position Description Ingester'); 710 console.log(''); 711 console.log('Usage:'); 712 console.log(' node position-description-ingester.js --text "job description text"'); 713 console.log(' node position-description-ingester.js --url "https://company.com/job"'); 714 console.log(' node position-description-ingester.js --file "job-description.txt"'); 715 console.log(''); 716 console.log('The ingester will analyze the job description and generate targeting'); 717 console.log('insights to help customize your CV for the specific opportunity.'); 718 } 719 } 720 721 if (require.main === module) { 722 main().catch(console.error); 723 } 724 725 module.exports = PositionDescriptionIngester;