twitter.cpp
1 // src/examples/simdjson/twitter.cpp 2 // 3 // simdjson 4.2.4 demo - SIMD-accelerated JSON parsing at 4+ GB/s 4 // 5 // Demonstrates: 6 // - On-demand parsing (DOM-free, lazy evaluation) 7 // - Nested object traversal 8 // - Array iteration 9 // - Error handling without exceptions 10 // - Performance measurement 11 // 12 // This example parses a mock Twitter API response structure, 13 // extracting user info and tweet text from nested JSON. 14 15 #include <chrono> 16 #include <cstdio> 17 #include <string> 18 #include <string_view> 19 #include <vector> 20 21 #include <simdjson.h> 22 23 namespace straylight::examples { 24 25 // ════════════════════════════════════════════════════════════════════════════════ 26 // Mock Twitter API response - realistic nested structure 27 // ════════════════════════════════════════════════════════════════════════════════ 28 29 constexpr std::string_view kTwitterResponse = R"({ 30 "data": [ 31 { 32 "id": "1445078208190291968", 33 "text": "The matrix has its roots in primitive arcade games", 34 "author_id": "12345", 35 "created_at": "2026-01-26T12:00:00.000Z", 36 "public_metrics": { 37 "retweet_count": 42, 38 "reply_count": 7, 39 "like_count": 256, 40 "quote_count": 3 41 }, 42 "entities": { 43 "hashtags": [ 44 {"start": 0, "end": 6, "tag": "matrix"}, 45 {"start": 30, "end": 36, "tag": "arcade"} 46 ], 47 "urls": [] 48 } 49 }, 50 { 51 "id": "1445078208190291969", 52 "text": "Cyberspace. A consensual hallucination experienced daily by billions", 53 "author_id": "12346", 54 "created_at": "2026-01-26T12:01:00.000Z", 55 "public_metrics": { 56 "retweet_count": 128, 57 "reply_count": 15, 58 "like_count": 512, 59 "quote_count": 8 60 }, 61 "entities": { 62 "hashtags": [ 63 {"start": 0, "end": 10, "tag": "cyberspace"} 64 ], 65 "urls": [ 66 {"start": 50, "end": 73, "url": "https://straylight.ai", "expanded_url": "https://straylight.ai/neuromancer"} 67 ] 68 } 69 }, 70 { 71 "id": "1445078208190291970", 72 "text": "The sky above the port was the color of television, tuned to a dead channel", 73 "author_id": "12347", 74 "created_at": "2026-01-26T12:02:00.000Z", 75 "public_metrics": { 76 "retweet_count": 1024, 77 "reply_count": 89, 78 "like_count": 4096, 79 "quote_count": 42 80 }, 81 "entities": { 82 "hashtags": [], 83 "urls": [] 84 } 85 } 86 ], 87 "includes": { 88 "users": [ 89 {"id": "12345", "name": "Case", "username": "case_cowboy", "verified": false}, 90 {"id": "12346", "name": "Molly", "username": "razorgirl", "verified": true}, 91 {"id": "12347", "name": "Wintermute", "username": "wintermute_ai", "verified": true} 92 ] 93 }, 94 "meta": { 95 "result_count": 3, 96 "newest_id": "1445078208190291970", 97 "oldest_id": "1445078208190291968" 98 } 99 })"; 100 101 // ════════════════════════════════════════════════════════════════════════════════ 102 // Tweet structure for extraction 103 // ════════════════════════════════════════════════════════════════════════════════ 104 105 struct Tweet { 106 std::string id; 107 std::string text; 108 std::string author_id; 109 int64_t likes{0}; 110 int64_t retweets{0}; 111 std::vector<std::string> hashtags; 112 }; 113 114 struct User { 115 std::string id; 116 std::string name; 117 std::string username; 118 bool verified{false}; 119 }; 120 121 // ════════════════════════════════════════════════════════════════════════════════ 122 // Parse tweets using simdjson on-demand API 123 // ════════════════════════════════════════════════════════════════════════════════ 124 125 auto parse_tweets(std::string_view json) -> std::vector<Tweet> { 126 std::vector<Tweet> tweets; 127 128 simdjson::ondemand::parser parser; 129 simdjson::padded_string padded(json); 130 131 auto doc = parser.iterate(padded); 132 133 // Navigate to data array 134 auto data = doc["data"]; 135 136 for (auto tweet_obj : data.get_array()) { 137 Tweet tweet; 138 139 // Extract basic fields - simdjson uses implicit conversion 140 std::string_view id_sv; 141 tweet_obj["id"].get_string().get(id_sv); 142 tweet.id = std::string(id_sv); 143 144 std::string_view text_sv; 145 tweet_obj["text"].get_string().get(text_sv); 146 tweet.text = std::string(text_sv); 147 148 std::string_view author_sv; 149 tweet_obj["author_id"].get_string().get(author_sv); 150 tweet.author_id = std::string(author_sv); 151 152 // Extract metrics 153 auto metrics = tweet_obj["public_metrics"]; 154 metrics["like_count"].get_int64().get(tweet.likes); 155 metrics["retweet_count"].get_int64().get(tweet.retweets); 156 157 // Extract hashtags 158 auto entities = tweet_obj["entities"]; 159 auto hashtags = entities["hashtags"]; 160 for (auto ht : hashtags.get_array()) { 161 std::string_view tag_sv; 162 ht["tag"].get_string().get(tag_sv); 163 tweet.hashtags.push_back(std::string(tag_sv)); 164 } 165 166 tweets.push_back(std::move(tweet)); 167 } 168 169 return tweets; 170 } 171 172 // ════════════════════════════════════════════════════════════════════════════════ 173 // Parse users 174 // ════════════════════════════════════════════════════════════════════════════════ 175 176 auto parse_users(std::string_view json) -> std::vector<User> { 177 std::vector<User> users; 178 179 simdjson::ondemand::parser parser; 180 simdjson::padded_string padded(json); 181 182 auto doc = parser.iterate(padded); 183 auto includes = doc["includes"]; 184 auto users_array = includes["users"]; 185 186 for (auto user_obj : users_array.get_array()) { 187 User user; 188 189 std::string_view sv; 190 user_obj["id"].get_string().get(sv); 191 user.id = std::string(sv); 192 193 user_obj["name"].get_string().get(sv); 194 user.name = std::string(sv); 195 196 user_obj["username"].get_string().get(sv); 197 user.username = std::string(sv); 198 199 user_obj["verified"].get_bool().get(user.verified); 200 201 users.push_back(std::move(user)); 202 } 203 204 return users; 205 } 206 207 // ════════════════════════════════════════════════════════════════════════════════ 208 // Benchmark: parse the same JSON many times 209 // ════════════════════════════════════════════════════════════════════════════════ 210 211 auto benchmark_parsing(std::string_view json, int iterations) -> double { 212 simdjson::ondemand::parser parser; 213 simdjson::padded_string padded(json); 214 215 auto start = std::chrono::high_resolution_clock::now(); 216 217 int64_t total_likes = 0; 218 for (int i = 0; i < iterations; ++i) { 219 auto doc = parser.iterate(padded); 220 auto data = doc["data"]; 221 for (auto tweet : data.get_array()) { 222 auto metrics = tweet["public_metrics"]; 223 int64_t likes = 0; 224 metrics["like_count"].get_int64().get(likes); 225 total_likes += likes; 226 } 227 } 228 229 auto end = std::chrono::high_resolution_clock::now(); 230 auto duration = std::chrono::duration<double, std::milli>(end - start).count(); 231 232 // Prevent optimization from removing the loop 233 if (total_likes == 0) { 234 std::printf("unexpected zero likes\n"); 235 } 236 237 return duration; 238 } 239 240 // ════════════════════════════════════════════════════════════════════════════════ 241 // Main demo 242 // ════════════════════════════════════════════════════════════════════════════════ 243 244 auto implementation() -> int { 245 std::printf("════════════════════════════════════════════════════════════\n"); 246 std::printf(" simdjson %s - SIMD-accelerated JSON parsing\n", SIMDJSON_VERSION); 247 std::printf("════════════════════════════════════════════════════════════\n\n"); 248 249 // Show implementation info 250 std::printf("Implementation: %s\n", simdjson::get_active_implementation()->name().data()); 251 std::printf("Description: %s\n\n", simdjson::get_active_implementation()->description().data()); 252 253 // Parse tweets 254 std::printf("Parsing Twitter API response (%zu bytes)...\n\n", kTwitterResponse.size()); 255 256 auto tweets = parse_tweets(kTwitterResponse); 257 auto users = parse_users(kTwitterResponse); 258 259 // Display results 260 std::printf("Found %zu tweets:\n", tweets.size()); 261 std::printf("────────────────────────────────────────────────────────────\n"); 262 263 for (const auto& tweet : tweets) { 264 // Find author 265 std::string author_name = "unknown"; 266 for (const auto& user : users) { 267 if (user.id == tweet.author_id) { 268 author_name = user.name; 269 if (user.verified) { 270 author_name += " [verified]"; 271 } 272 break; 273 } 274 } 275 276 std::printf("\n@%s:\n", author_name.c_str()); 277 std::printf(" \"%s\"\n", tweet.text.c_str()); 278 std::printf(" likes: %ld retweets: %ld", tweet.likes, tweet.retweets); 279 280 if (!tweet.hashtags.empty()) { 281 std::printf(" tags: "); 282 for (size_t i = 0; i < tweet.hashtags.size(); ++i) { 283 std::printf("#%s", tweet.hashtags[i].c_str()); 284 if (i < tweet.hashtags.size() - 1) 285 std::printf(", "); 286 } 287 } 288 std::printf("\n"); 289 } 290 291 // Benchmark 292 std::printf("\n════════════════════════════════════════════════════════════\n"); 293 std::printf(" Performance benchmark\n"); 294 std::printf("════════════════════════════════════════════════════════════\n\n"); 295 296 constexpr int kIterations = 100000; 297 double ms = benchmark_parsing(kTwitterResponse, kIterations); 298 299 double bytes_processed = static_cast<double>(kTwitterResponse.size()) * kIterations; 300 double gb_per_sec = (bytes_processed / (1024.0 * 1024.0 * 1024.0)) / (ms / 1000.0); 301 302 std::printf("Parsed %d iterations in %.2f ms\n", kIterations, ms); 303 std::printf("Throughput: %.2f GB/s\n", gb_per_sec); 304 std::printf("Per-parse: %.3f microseconds\n\n", (ms * 1000.0) / kIterations); 305 306 std::printf("════════════════════════════════════════════════════════════\n"); 307 std::printf(" simdjson: parsing JSON at the speed of your CPU\n"); 308 std::printf("════════════════════════════════════════════════════════════\n"); 309 310 return 0; 311 } 312 313 } // namespace straylight::examples 314 315 auto main() -> int { 316 return straylight::examples::implementation(); 317 }