/ sentiment_analyzer.py
sentiment_analyzer.py
  1  """
  2  Sentiment Analyzer Module
  3  
  4  NLP-powered market sentiment analysis for financial assets.
  5  """
  6  
  7  import re
  8  from typing import List, Dict, Any, Optional
  9  from collections import defaultdict
 10  
 11  
 12  class SentimentAnalyzer:
 13      """Analyzes market sentiment from various sources."""
 14  
 15      def __init__(self):
 16          """Initialize sentiment analyzer with lexicons."""
 17          # Financial sentiment lexicon
 18          self.positive_words = {
 19              "bullish", "growth", "profit", "gain", "surge", "rally", "upgrade",
 20              "outperform", "buy", "strong", "beat", "exceed", "record", "high",
 21              "momentum", "breakout", "accumulate", "opportunity", "upside",
 22              "recovery", "expansion", "innovative", "leader", "breakthrough"
 23          }
 24  
 25          self.negative_words = {
 26              "bearish", "loss", "decline", "drop", "crash", "sell", "downgrade",
 27              "underperform", "weak", "miss", "below", "low", "risk", "concern",
 28              "warning", "fear", "volatile", "uncertainty", "recession", "debt",
 29              "layoff", "restructure", "lawsuit", "investigation", "fraud"
 30          }
 31  
 32          self.intensity_modifiers = {
 33              "very": 1.5, "extremely": 2.0, "slightly": 0.5, "somewhat": 0.7,
 34              "significantly": 1.8, "substantially": 1.6, "marginally": 0.3
 35          }
 36  
 37          self.negation_words = {
 38              "not", "no", "never", "neither", "nobody", "nothing", "nowhere",
 39              "hardly", "barely", "doesn't", "don't", "won't", "isn't", "aren't"
 40          }
 41  
 42      def analyze(
 43          self,
 44          symbols: List[str],
 45          sources: Optional[List[str]] = None
 46      ) -> Dict[str, Any]:
 47          """
 48          Analyze market sentiment for given symbols.
 49  
 50          Args:
 51              symbols: List of stock symbols to analyze
 52              sources: Optional list of news sources to focus on
 53  
 54          Returns:
 55              Sentiment analysis results
 56          """
 57          results = {}
 58  
 59          for symbol in symbols:
 60              # Get simulated news data for the symbol
 61              news_items = self._get_news(symbol)
 62  
 63              # Analyze each news item
 64              sentiment_scores = []
 65              topics = defaultdict(int)
 66              key_phrases = []
 67  
 68              for item in news_items:
 69                  score = self._analyze_text(item["text"])
 70                  sentiment_scores.append(score)
 71  
 72                  # Extract topics
 73                  extracted_topics = self._extract_topics(item["text"])
 74                  for topic in extracted_topics:
 75                      topics[topic] += 1
 76  
 77                  # Extract key phrases if significant sentiment
 78                  if abs(score) > 0.3:
 79                      key_phrases.append({
 80                          "text": item["headline"],
 81                          "sentiment": round(score, 3),
 82                          "source": item["source"]
 83                      })
 84  
 85              # Aggregate results
 86              avg_sentiment = sum(sentiment_scores) / len(sentiment_scores) if sentiment_scores else 0
 87              sentiment_std = (
 88                  (sum((s - avg_sentiment) ** 2 for s in sentiment_scores) / len(sentiment_scores)) ** 0.5
 89                  if len(sentiment_scores) > 1 else 0
 90              )
 91  
 92              results[symbol] = {
 93                  "overall_sentiment": round(avg_sentiment, 3),
 94                  "sentiment_label": self._get_label(avg_sentiment),
 95                  "confidence": round(1 - min(sentiment_std, 1), 3),
 96                  "news_volume": len(news_items),
 97                  "sentiment_distribution": {
 98                      "positive": sum(1 for s in sentiment_scores if s > 0.1),
 99                      "neutral": sum(1 for s in sentiment_scores if -0.1 <= s <= 0.1),
100                      "negative": sum(1 for s in sentiment_scores if s < -0.1)
101                  },
102                  "top_topics": dict(sorted(topics.items(), key=lambda x: -x[1])[:5]),
103                  "key_phrases": key_phrases[:5],
104                  "trend": self._calculate_trend(sentiment_scores)
105              }
106  
107          # Market-wide sentiment
108          all_scores = []
109          for symbol_data in results.values():
110              all_scores.append(symbol_data["overall_sentiment"])
111  
112          market_sentiment = sum(all_scores) / len(all_scores) if all_scores else 0
113  
114          return {
115              "symbol_sentiment": results,
116              "market_overview": {
117                  "average_sentiment": round(market_sentiment, 3),
118                  "label": self._get_label(market_sentiment),
119                  "bullish_count": sum(1 for r in results.values() if r["overall_sentiment"] > 0.1),
120                  "bearish_count": sum(1 for r in results.values() if r["overall_sentiment"] < -0.1),
121                  "neutral_count": sum(1 for r in results.values() if -0.1 <= r["overall_sentiment"] <= 0.1)
122              }
123          }
124  
125      def _analyze_text(self, text: str) -> float:
126          """
127          Analyze sentiment of a single text.
128  
129          Returns score from -1 (very negative) to 1 (very positive).
130          """
131          text_lower = text.lower()
132          words = re.findall(r'\b\w+\b', text_lower)
133  
134          positive_count = 0
135          negative_count = 0
136          current_modifier = 1.0
137          negation = False
138  
139          for i, word in enumerate(words):
140              # Check for negation
141              if word in self.negation_words:
142                  negation = True
143                  continue
144  
145              # Check for intensity modifiers
146              if word in self.intensity_modifiers:
147                  current_modifier = self.intensity_modifiers[word]
148                  continue
149  
150              # Score positive/negative words
151              if word in self.positive_words:
152                  score = current_modifier * (1 if not negation else -1)
153                  positive_count += max(score, 0)
154                  negative_count += max(-score, 0)
155              elif word in self.negative_words:
156                  score = current_modifier * (-1 if not negation else 1)
157                  negative_count += max(-score, 0)
158                  positive_count += max(score, 0)
159  
160              # Reset modifiers after applying
161              current_modifier = 1.0
162              negation = False
163  
164          # Calculate final score
165          total = positive_count + negative_count
166          if total == 0:
167              return 0.0
168  
169          score = (positive_count - negative_count) / (total + 1)  # Dampened
170          return max(-1, min(1, score))  # Clamp to [-1, 1]
171  
172      def _get_label(self, score: float) -> str:
173          """Convert numeric sentiment to label."""
174          if score > 0.3:
175              return "strongly_bullish"
176          elif score > 0.1:
177              return "bullish"
178          elif score < -0.3:
179              return "strongly_bearish"
180          elif score < -0.1:
181              return "bearish"
182          else:
183              return "neutral"
184  
185      def _extract_topics(self, text: str) -> List[str]:
186          """Extract topics from text."""
187          topic_keywords = {
188              "earnings": ["earnings", "revenue", "profit", "eps", "quarterly"],
189              "guidance": ["guidance", "outlook", "forecast", "expect"],
190              "management": ["ceo", "cfo", "executive", "leadership", "board"],
191              "product": ["product", "launch", "release", "innovation", "feature"],
192              "competition": ["competitor", "market share", "rivalry", "compete"],
193              "regulation": ["sec", "ftc", "regulation", "compliance", "legal"],
194              "macro": ["fed", "interest rate", "inflation", "gdp", "economy"],
195              "insider": ["insider", "buyback", "dividend", "acquisition", "merger"]
196          }
197  
198          text_lower = text.lower()
199          topics = []
200  
201          for topic, keywords in topic_keywords.items():
202              if any(kw in text_lower for kw in keywords):
203                  topics.append(topic)
204  
205          return topics
206  
207      def _calculate_trend(self, scores: List[float]) -> str:
208          """Calculate sentiment trend over time."""
209          if len(scores) < 3:
210              return "insufficient_data"
211  
212          # Split into early and late
213          mid = len(scores) // 2
214          early_avg = sum(scores[:mid]) / mid
215          late_avg = sum(scores[mid:]) / (len(scores) - mid)
216  
217          diff = late_avg - early_avg
218          if diff > 0.1:
219              return "improving"
220          elif diff < -0.1:
221              return "deteriorating"
222          else:
223              return "stable"
224  
225      def _get_news(self, symbol: str) -> List[Dict[str, Any]]:
226          """
227          Get simulated news data for a symbol.
228          In production, this would fetch from real news APIs.
229          """
230          # Simulated news based on symbol
231          news_templates = {
232              "AAPL": [
233                  {
234                      "headline": "Apple Reports Strong iPhone Sales, Beats Expectations",
235                      "text": "Apple reported record quarterly revenue with strong iPhone sales growth exceeding analyst expectations. The company showed significant momentum in services.",
236                      "source": "Reuters"
237                  },
238                  {
239                      "headline": "Apple Faces Regulatory Scrutiny in EU Markets",
240                      "text": "European regulators announced investigation into Apple's app store policies. Concerns about competition and uncertainty around potential fines.",
241                      "source": "Bloomberg"
242                  },
243                  {
244                      "headline": "Apple Announces Innovative AI Features for Products",
245                      "text": "Apple revealed breakthrough AI capabilities coming to iPhone and Mac, positioning as leader in on-device AI with strong privacy focus.",
246                      "source": "TechCrunch"
247                  }
248              ],
249              "GOOGL": [
250                  {
251                      "headline": "Google Cloud Shows Strong Growth Momentum",
252                      "text": "Alphabet's cloud division posted significant growth, beating expectations and gaining market share against competitors.",
253                      "source": "WSJ"
254                  },
255                  {
256                      "headline": "Google Faces Antitrust Lawsuit Concerns",
257                      "text": "DOJ lawsuit creates uncertainty for Google's search business. Analysts express concern about potential restructuring.",
258                      "source": "NYTimes"
259                  }
260              ],
261              "MSFT": [
262                  {
263                      "headline": "Microsoft AI Strategy Drives Growth",
264                      "text": "Microsoft reported strong earnings driven by AI integration across products. Azure growth exceeded expectations with bullish guidance.",
265                      "source": "CNBC"
266                  },
267                  {
268                      "headline": "Microsoft Expands Enterprise AI Partnerships",
269                      "text": "Major enterprise deals signal strong demand for Microsoft's AI solutions. Analysts upgrade outlook citing momentum.",
270                      "source": "Bloomberg"
271                  }
272              ]
273          }
274  
275          # Default news for unknown symbols
276          default_news = [
277              {
278                  "headline": f"{symbol} Shows Mixed Trading Signals",
279                  "text": f"{symbol} traded in a volatile range with somewhat mixed signals from technical indicators.",
280                  "source": "MarketWatch"
281              },
282              {
283                  "headline": f"Analysts Maintain Neutral Outlook on {symbol}",
284                  "text": f"Wall Street analysts maintain neutral ratings on {symbol} amid market uncertainty.",
285                  "source": "Seeking Alpha"
286              }
287          ]
288  
289          return news_templates.get(symbol.upper(), default_news)