/ sentiment_analyzer.py
sentiment_analyzer.py
1 """ 2 Sentiment Analyzer Module 3 4 NLP-powered market sentiment analysis for financial assets. 5 """ 6 7 import re 8 from typing import List, Dict, Any, Optional 9 from collections import defaultdict 10 11 12 class SentimentAnalyzer: 13 """Analyzes market sentiment from various sources.""" 14 15 def __init__(self): 16 """Initialize sentiment analyzer with lexicons.""" 17 # Financial sentiment lexicon 18 self.positive_words = { 19 "bullish", "growth", "profit", "gain", "surge", "rally", "upgrade", 20 "outperform", "buy", "strong", "beat", "exceed", "record", "high", 21 "momentum", "breakout", "accumulate", "opportunity", "upside", 22 "recovery", "expansion", "innovative", "leader", "breakthrough" 23 } 24 25 self.negative_words = { 26 "bearish", "loss", "decline", "drop", "crash", "sell", "downgrade", 27 "underperform", "weak", "miss", "below", "low", "risk", "concern", 28 "warning", "fear", "volatile", "uncertainty", "recession", "debt", 29 "layoff", "restructure", "lawsuit", "investigation", "fraud" 30 } 31 32 self.intensity_modifiers = { 33 "very": 1.5, "extremely": 2.0, "slightly": 0.5, "somewhat": 0.7, 34 "significantly": 1.8, "substantially": 1.6, "marginally": 0.3 35 } 36 37 self.negation_words = { 38 "not", "no", "never", "neither", "nobody", "nothing", "nowhere", 39 "hardly", "barely", "doesn't", "don't", "won't", "isn't", "aren't" 40 } 41 42 def analyze( 43 self, 44 symbols: List[str], 45 sources: Optional[List[str]] = None 46 ) -> Dict[str, Any]: 47 """ 48 Analyze market sentiment for given symbols. 49 50 Args: 51 symbols: List of stock symbols to analyze 52 sources: Optional list of news sources to focus on 53 54 Returns: 55 Sentiment analysis results 56 """ 57 results = {} 58 59 for symbol in symbols: 60 # Get simulated news data for the symbol 61 news_items = self._get_news(symbol) 62 63 # Analyze each news item 64 sentiment_scores = [] 65 topics = defaultdict(int) 66 key_phrases = [] 67 68 for item in news_items: 69 score = self._analyze_text(item["text"]) 70 sentiment_scores.append(score) 71 72 # Extract topics 73 extracted_topics = self._extract_topics(item["text"]) 74 for topic in extracted_topics: 75 topics[topic] += 1 76 77 # Extract key phrases if significant sentiment 78 if abs(score) > 0.3: 79 key_phrases.append({ 80 "text": item["headline"], 81 "sentiment": round(score, 3), 82 "source": item["source"] 83 }) 84 85 # Aggregate results 86 avg_sentiment = sum(sentiment_scores) / len(sentiment_scores) if sentiment_scores else 0 87 sentiment_std = ( 88 (sum((s - avg_sentiment) ** 2 for s in sentiment_scores) / len(sentiment_scores)) ** 0.5 89 if len(sentiment_scores) > 1 else 0 90 ) 91 92 results[symbol] = { 93 "overall_sentiment": round(avg_sentiment, 3), 94 "sentiment_label": self._get_label(avg_sentiment), 95 "confidence": round(1 - min(sentiment_std, 1), 3), 96 "news_volume": len(news_items), 97 "sentiment_distribution": { 98 "positive": sum(1 for s in sentiment_scores if s > 0.1), 99 "neutral": sum(1 for s in sentiment_scores if -0.1 <= s <= 0.1), 100 "negative": sum(1 for s in sentiment_scores if s < -0.1) 101 }, 102 "top_topics": dict(sorted(topics.items(), key=lambda x: -x[1])[:5]), 103 "key_phrases": key_phrases[:5], 104 "trend": self._calculate_trend(sentiment_scores) 105 } 106 107 # Market-wide sentiment 108 all_scores = [] 109 for symbol_data in results.values(): 110 all_scores.append(symbol_data["overall_sentiment"]) 111 112 market_sentiment = sum(all_scores) / len(all_scores) if all_scores else 0 113 114 return { 115 "symbol_sentiment": results, 116 "market_overview": { 117 "average_sentiment": round(market_sentiment, 3), 118 "label": self._get_label(market_sentiment), 119 "bullish_count": sum(1 for r in results.values() if r["overall_sentiment"] > 0.1), 120 "bearish_count": sum(1 for r in results.values() if r["overall_sentiment"] < -0.1), 121 "neutral_count": sum(1 for r in results.values() if -0.1 <= r["overall_sentiment"] <= 0.1) 122 } 123 } 124 125 def _analyze_text(self, text: str) -> float: 126 """ 127 Analyze sentiment of a single text. 128 129 Returns score from -1 (very negative) to 1 (very positive). 130 """ 131 text_lower = text.lower() 132 words = re.findall(r'\b\w+\b', text_lower) 133 134 positive_count = 0 135 negative_count = 0 136 current_modifier = 1.0 137 negation = False 138 139 for i, word in enumerate(words): 140 # Check for negation 141 if word in self.negation_words: 142 negation = True 143 continue 144 145 # Check for intensity modifiers 146 if word in self.intensity_modifiers: 147 current_modifier = self.intensity_modifiers[word] 148 continue 149 150 # Score positive/negative words 151 if word in self.positive_words: 152 score = current_modifier * (1 if not negation else -1) 153 positive_count += max(score, 0) 154 negative_count += max(-score, 0) 155 elif word in self.negative_words: 156 score = current_modifier * (-1 if not negation else 1) 157 negative_count += max(-score, 0) 158 positive_count += max(score, 0) 159 160 # Reset modifiers after applying 161 current_modifier = 1.0 162 negation = False 163 164 # Calculate final score 165 total = positive_count + negative_count 166 if total == 0: 167 return 0.0 168 169 score = (positive_count - negative_count) / (total + 1) # Dampened 170 return max(-1, min(1, score)) # Clamp to [-1, 1] 171 172 def _get_label(self, score: float) -> str: 173 """Convert numeric sentiment to label.""" 174 if score > 0.3: 175 return "strongly_bullish" 176 elif score > 0.1: 177 return "bullish" 178 elif score < -0.3: 179 return "strongly_bearish" 180 elif score < -0.1: 181 return "bearish" 182 else: 183 return "neutral" 184 185 def _extract_topics(self, text: str) -> List[str]: 186 """Extract topics from text.""" 187 topic_keywords = { 188 "earnings": ["earnings", "revenue", "profit", "eps", "quarterly"], 189 "guidance": ["guidance", "outlook", "forecast", "expect"], 190 "management": ["ceo", "cfo", "executive", "leadership", "board"], 191 "product": ["product", "launch", "release", "innovation", "feature"], 192 "competition": ["competitor", "market share", "rivalry", "compete"], 193 "regulation": ["sec", "ftc", "regulation", "compliance", "legal"], 194 "macro": ["fed", "interest rate", "inflation", "gdp", "economy"], 195 "insider": ["insider", "buyback", "dividend", "acquisition", "merger"] 196 } 197 198 text_lower = text.lower() 199 topics = [] 200 201 for topic, keywords in topic_keywords.items(): 202 if any(kw in text_lower for kw in keywords): 203 topics.append(topic) 204 205 return topics 206 207 def _calculate_trend(self, scores: List[float]) -> str: 208 """Calculate sentiment trend over time.""" 209 if len(scores) < 3: 210 return "insufficient_data" 211 212 # Split into early and late 213 mid = len(scores) // 2 214 early_avg = sum(scores[:mid]) / mid 215 late_avg = sum(scores[mid:]) / (len(scores) - mid) 216 217 diff = late_avg - early_avg 218 if diff > 0.1: 219 return "improving" 220 elif diff < -0.1: 221 return "deteriorating" 222 else: 223 return "stable" 224 225 def _get_news(self, symbol: str) -> List[Dict[str, Any]]: 226 """ 227 Get simulated news data for a symbol. 228 In production, this would fetch from real news APIs. 229 """ 230 # Simulated news based on symbol 231 news_templates = { 232 "AAPL": [ 233 { 234 "headline": "Apple Reports Strong iPhone Sales, Beats Expectations", 235 "text": "Apple reported record quarterly revenue with strong iPhone sales growth exceeding analyst expectations. The company showed significant momentum in services.", 236 "source": "Reuters" 237 }, 238 { 239 "headline": "Apple Faces Regulatory Scrutiny in EU Markets", 240 "text": "European regulators announced investigation into Apple's app store policies. Concerns about competition and uncertainty around potential fines.", 241 "source": "Bloomberg" 242 }, 243 { 244 "headline": "Apple Announces Innovative AI Features for Products", 245 "text": "Apple revealed breakthrough AI capabilities coming to iPhone and Mac, positioning as leader in on-device AI with strong privacy focus.", 246 "source": "TechCrunch" 247 } 248 ], 249 "GOOGL": [ 250 { 251 "headline": "Google Cloud Shows Strong Growth Momentum", 252 "text": "Alphabet's cloud division posted significant growth, beating expectations and gaining market share against competitors.", 253 "source": "WSJ" 254 }, 255 { 256 "headline": "Google Faces Antitrust Lawsuit Concerns", 257 "text": "DOJ lawsuit creates uncertainty for Google's search business. Analysts express concern about potential restructuring.", 258 "source": "NYTimes" 259 } 260 ], 261 "MSFT": [ 262 { 263 "headline": "Microsoft AI Strategy Drives Growth", 264 "text": "Microsoft reported strong earnings driven by AI integration across products. Azure growth exceeded expectations with bullish guidance.", 265 "source": "CNBC" 266 }, 267 { 268 "headline": "Microsoft Expands Enterprise AI Partnerships", 269 "text": "Major enterprise deals signal strong demand for Microsoft's AI solutions. Analysts upgrade outlook citing momentum.", 270 "source": "Bloomberg" 271 } 272 ] 273 } 274 275 # Default news for unknown symbols 276 default_news = [ 277 { 278 "headline": f"{symbol} Shows Mixed Trading Signals", 279 "text": f"{symbol} traded in a volatile range with somewhat mixed signals from technical indicators.", 280 "source": "MarketWatch" 281 }, 282 { 283 "headline": f"Analysts Maintain Neutral Outlook on {symbol}", 284 "text": f"Wall Street analysts maintain neutral ratings on {symbol} amid market uncertainty.", 285 "source": "Seeking Alpha" 286 } 287 ] 288 289 return news_templates.get(symbol.upper(), default_news)