/ src / examples / simdjson / twitter.cpp
twitter.cpp
  1  // src/examples/simdjson/twitter.cpp
  2  //
  3  // simdjson 4.2.4 demo - SIMD-accelerated JSON parsing at 4+ GB/s
  4  //
  5  // Demonstrates:
  6  //   - On-demand parsing (DOM-free, lazy evaluation)
  7  //   - Nested object traversal
  8  //   - Array iteration
  9  //   - Error handling without exceptions
 10  //   - Performance measurement
 11  //
 12  // This example parses a mock Twitter API response structure,
 13  // extracting user info and tweet text from nested JSON.
 14  
 15  #include <chrono>
 16  #include <cstdio>
 17  #include <string>
 18  #include <string_view>
 19  #include <vector>
 20  
 21  #include <simdjson.h>
 22  
 23  namespace straylight::examples {
 24  
 25  // ════════════════════════════════════════════════════════════════════════════════
 26  // Mock Twitter API response - realistic nested structure
 27  // ════════════════════════════════════════════════════════════════════════════════
 28  
 29  constexpr std::string_view kTwitterResponse = R"({
 30    "data": [
 31      {
 32        "id": "1445078208190291968",
 33        "text": "The matrix has its roots in primitive arcade games",
 34        "author_id": "12345",
 35        "created_at": "2026-01-26T12:00:00.000Z",
 36        "public_metrics": {
 37          "retweet_count": 42,
 38          "reply_count": 7,
 39          "like_count": 256,
 40          "quote_count": 3
 41        },
 42        "entities": {
 43          "hashtags": [
 44            {"start": 0, "end": 6, "tag": "matrix"},
 45            {"start": 30, "end": 36, "tag": "arcade"}
 46          ],
 47          "urls": []
 48        }
 49      },
 50      {
 51        "id": "1445078208190291969",
 52        "text": "Cyberspace. A consensual hallucination experienced daily by billions",
 53        "author_id": "12346",
 54        "created_at": "2026-01-26T12:01:00.000Z",
 55        "public_metrics": {
 56          "retweet_count": 128,
 57          "reply_count": 15,
 58          "like_count": 512,
 59          "quote_count": 8
 60        },
 61        "entities": {
 62          "hashtags": [
 63            {"start": 0, "end": 10, "tag": "cyberspace"}
 64          ],
 65          "urls": [
 66            {"start": 50, "end": 73, "url": "https://straylight.ai", "expanded_url": "https://straylight.ai/neuromancer"}
 67          ]
 68        }
 69      },
 70      {
 71        "id": "1445078208190291970",
 72        "text": "The sky above the port was the color of television, tuned to a dead channel",
 73        "author_id": "12347",
 74        "created_at": "2026-01-26T12:02:00.000Z",
 75        "public_metrics": {
 76          "retweet_count": 1024,
 77          "reply_count": 89,
 78          "like_count": 4096,
 79          "quote_count": 42
 80        },
 81        "entities": {
 82          "hashtags": [],
 83          "urls": []
 84        }
 85      }
 86    ],
 87    "includes": {
 88      "users": [
 89        {"id": "12345", "name": "Case", "username": "case_cowboy", "verified": false},
 90        {"id": "12346", "name": "Molly", "username": "razorgirl", "verified": true},
 91        {"id": "12347", "name": "Wintermute", "username": "wintermute_ai", "verified": true}
 92      ]
 93    },
 94    "meta": {
 95      "result_count": 3,
 96      "newest_id": "1445078208190291970",
 97      "oldest_id": "1445078208190291968"
 98    }
 99  })";
100  
101  // ════════════════════════════════════════════════════════════════════════════════
102  // Tweet structure for extraction
103  // ════════════════════════════════════════════════════════════════════════════════
104  
105  struct Tweet {
106    std::string id;
107    std::string text;
108    std::string author_id;
109    int64_t likes{0};
110    int64_t retweets{0};
111    std::vector<std::string> hashtags;
112  };
113  
114  struct User {
115    std::string id;
116    std::string name;
117    std::string username;
118    bool verified{false};
119  };
120  
121  // ════════════════════════════════════════════════════════════════════════════════
122  // Parse tweets using simdjson on-demand API
123  // ════════════════════════════════════════════════════════════════════════════════
124  
125  auto parse_tweets(std::string_view json) -> std::vector<Tweet> {
126    std::vector<Tweet> tweets;
127  
128    simdjson::ondemand::parser parser;
129    simdjson::padded_string padded(json);
130  
131    auto doc = parser.iterate(padded);
132  
133    // Navigate to data array
134    auto data = doc["data"];
135  
136    for (auto tweet_obj : data.get_array()) {
137      Tweet tweet;
138  
139      // Extract basic fields - simdjson uses implicit conversion
140      std::string_view id_sv;
141      tweet_obj["id"].get_string().get(id_sv);
142      tweet.id = std::string(id_sv);
143  
144      std::string_view text_sv;
145      tweet_obj["text"].get_string().get(text_sv);
146      tweet.text = std::string(text_sv);
147  
148      std::string_view author_sv;
149      tweet_obj["author_id"].get_string().get(author_sv);
150      tweet.author_id = std::string(author_sv);
151  
152      // Extract metrics
153      auto metrics = tweet_obj["public_metrics"];
154      metrics["like_count"].get_int64().get(tweet.likes);
155      metrics["retweet_count"].get_int64().get(tweet.retweets);
156  
157      // Extract hashtags
158      auto entities = tweet_obj["entities"];
159      auto hashtags = entities["hashtags"];
160      for (auto ht : hashtags.get_array()) {
161        std::string_view tag_sv;
162        ht["tag"].get_string().get(tag_sv);
163        tweet.hashtags.push_back(std::string(tag_sv));
164      }
165  
166      tweets.push_back(std::move(tweet));
167    }
168  
169    return tweets;
170  }
171  
172  // ════════════════════════════════════════════════════════════════════════════════
173  // Parse users
174  // ════════════════════════════════════════════════════════════════════════════════
175  
176  auto parse_users(std::string_view json) -> std::vector<User> {
177    std::vector<User> users;
178  
179    simdjson::ondemand::parser parser;
180    simdjson::padded_string padded(json);
181  
182    auto doc = parser.iterate(padded);
183    auto includes = doc["includes"];
184    auto users_array = includes["users"];
185  
186    for (auto user_obj : users_array.get_array()) {
187      User user;
188  
189      std::string_view sv;
190      user_obj["id"].get_string().get(sv);
191      user.id = std::string(sv);
192  
193      user_obj["name"].get_string().get(sv);
194      user.name = std::string(sv);
195  
196      user_obj["username"].get_string().get(sv);
197      user.username = std::string(sv);
198  
199      user_obj["verified"].get_bool().get(user.verified);
200  
201      users.push_back(std::move(user));
202    }
203  
204    return users;
205  }
206  
207  // ════════════════════════════════════════════════════════════════════════════════
208  // Benchmark: parse the same JSON many times
209  // ════════════════════════════════════════════════════════════════════════════════
210  
211  auto benchmark_parsing(std::string_view json, int iterations) -> double {
212    simdjson::ondemand::parser parser;
213    simdjson::padded_string padded(json);
214  
215    auto start = std::chrono::high_resolution_clock::now();
216  
217    int64_t total_likes = 0;
218    for (int i = 0; i < iterations; ++i) {
219      auto doc = parser.iterate(padded);
220      auto data = doc["data"];
221      for (auto tweet : data.get_array()) {
222        auto metrics = tweet["public_metrics"];
223        int64_t likes = 0;
224        metrics["like_count"].get_int64().get(likes);
225        total_likes += likes;
226      }
227    }
228  
229    auto end = std::chrono::high_resolution_clock::now();
230    auto duration = std::chrono::duration<double, std::milli>(end - start).count();
231  
232    // Prevent optimization from removing the loop
233    if (total_likes == 0) {
234      std::printf("unexpected zero likes\n");
235    }
236  
237    return duration;
238  }
239  
240  // ════════════════════════════════════════════════════════════════════════════════
241  // Main demo
242  // ════════════════════════════════════════════════════════════════════════════════
243  
244  auto implementation() -> int {
245    std::printf("════════════════════════════════════════════════════════════\n");
246    std::printf("  simdjson %s - SIMD-accelerated JSON parsing\n", SIMDJSON_VERSION);
247    std::printf("════════════════════════════════════════════════════════════\n\n");
248  
249    // Show implementation info
250    std::printf("Implementation: %s\n", simdjson::get_active_implementation()->name().data());
251    std::printf("Description: %s\n\n", simdjson::get_active_implementation()->description().data());
252  
253    // Parse tweets
254    std::printf("Parsing Twitter API response (%zu bytes)...\n\n", kTwitterResponse.size());
255  
256    auto tweets = parse_tweets(kTwitterResponse);
257    auto users = parse_users(kTwitterResponse);
258  
259    // Display results
260    std::printf("Found %zu tweets:\n", tweets.size());
261    std::printf("────────────────────────────────────────────────────────────\n");
262  
263    for (const auto& tweet : tweets) {
264      // Find author
265      std::string author_name = "unknown";
266      for (const auto& user : users) {
267        if (user.id == tweet.author_id) {
268          author_name = user.name;
269          if (user.verified) {
270            author_name += " [verified]";
271          }
272          break;
273        }
274      }
275  
276      std::printf("\n@%s:\n", author_name.c_str());
277      std::printf("  \"%s\"\n", tweet.text.c_str());
278      std::printf("  likes: %ld  retweets: %ld", tweet.likes, tweet.retweets);
279  
280      if (!tweet.hashtags.empty()) {
281        std::printf("  tags: ");
282        for (size_t i = 0; i < tweet.hashtags.size(); ++i) {
283          std::printf("#%s", tweet.hashtags[i].c_str());
284          if (i < tweet.hashtags.size() - 1)
285            std::printf(", ");
286        }
287      }
288      std::printf("\n");
289    }
290  
291    // Benchmark
292    std::printf("\n════════════════════════════════════════════════════════════\n");
293    std::printf("  Performance benchmark\n");
294    std::printf("════════════════════════════════════════════════════════════\n\n");
295  
296    constexpr int kIterations = 100000;
297    double ms = benchmark_parsing(kTwitterResponse, kIterations);
298  
299    double bytes_processed = static_cast<double>(kTwitterResponse.size()) * kIterations;
300    double gb_per_sec = (bytes_processed / (1024.0 * 1024.0 * 1024.0)) / (ms / 1000.0);
301  
302    std::printf("Parsed %d iterations in %.2f ms\n", kIterations, ms);
303    std::printf("Throughput: %.2f GB/s\n", gb_per_sec);
304    std::printf("Per-parse: %.3f microseconds\n\n", (ms * 1000.0) / kIterations);
305  
306    std::printf("════════════════════════════════════════════════════════════\n");
307    std::printf("  simdjson: parsing JSON at the speed of your CPU\n");
308    std::printf("════════════════════════════════════════════════════════════\n");
309  
310    return 0;
311  }
312  
313  } // namespace straylight::examples
314  
315  auto main() -> int {
316    return straylight::examples::implementation();
317  }