/ src / scrape.rs
scrape.rs
  1  use reqwest::blocking::get;
  2  use scraper::{ElementRef, Html, Selector};
  3  
  4  pub struct Service {
  5      pub arrival_time: String,
  6      pub departure_time: String,
  7      pub origin: String,
  8      pub destination: String,
  9      pub platform: u8,
 10      pub toc: String,
 11  }
 12  
 13  fn get_inner_html(selector: &Selector, service: &ElementRef, missing: &str) -> String {
 14      let element = service
 15          .select(selector)
 16          .map(|x| x.inner_html())
 17          .next()
 18          .unwrap_or_else(|| String::from(missing));
 19      element
 20  }
 21  
 22  fn get_service_list_from_html(document: &Html, missing: &str) -> Vec<Service> {
 23      let mut service_list: Vec<Service> = Vec::new();
 24  
 25      for service in document.select(
 26          &Selector::parse("a.service").expect("<a class=\"service\"> does not exist in documents"),
 27      ) {
 28          let destination = get_inner_html(
 29              &Selector::parse("div.location.d>span")
 30                  .expect("<selector div.location.d>span does not exist in service"),
 31              &service,
 32              missing,
 33          );
 34          let origin = get_inner_html(
 35              &Selector::parse("div.location.o>span")
 36                  .expect("<selector div.location.o>span does not exist in service"),
 37              &service,
 38              missing,
 39          );
 40          let platform_string = get_inner_html(
 41              &Selector::parse("div.platform.c.act")
 42                  .expect("<selector div.platform.c>span does not exist in service"),
 43              &service,
 44              missing,
 45          );
 46          let departure_time = get_inner_html(
 47              &Selector::parse("div.time.d.gbtt")
 48                  .expect("<selector div.time.d.gbtt does not exist in service"),
 49              &service,
 50              missing,
 51          );
 52          let arrival_time = get_inner_html(
 53              &Selector::parse("div.time.a.gbtt")
 54                  .expect("<selector div.time.a.gbtt does not exist in service"),
 55              &service,
 56              missing,
 57          );
 58          let toc = get_inner_html(
 59              &Selector::parse("div.toc").expect("<div class=\"\" does not exist in service"),
 60              &service,
 61              missing,
 62          );
 63          let platform = platform_string.parse().unwrap_or(255);
 64  
 65          // if destination == String::new() {
 66          //     // removes terminating services
 67          //     continue;
 68          // };
 69  
 70          service_list.push(Service {
 71              arrival_time,
 72              departure_time,
 73              origin,
 74              destination,
 75              platform,
 76              toc,
 77          });
 78      }
 79      service_list
 80  }
 81  
 82  #[must_use]
 83  pub fn get_services(date: &str, station: &str, missing: &str) -> Vec<Service> {
 84      //!
 85      //! # Panics
 86      //!
 87      //! will panic if get request fails, or contians no text
 88      let url = format!("https://www.realtimetrains.co.uk/search/detailed/gb-nr:{station}/{date}/0000-2359?stp=WVS&show=pax-calls&order=wtt");
 89      let result = get(url)
 90          .expect("get request failed")
 91          .text()
 92          .expect("http response contains no text");
 93      let document = Html::parse_document(&result);
 94  
 95      get_service_list_from_html(&document, missing)
 96  }
 97  
 98  pub fn csv_services(date: &String, service_list: &[Service], all_plats: bool) {
 99      for service in service_list {
100          if service.platform == 0 || all_plats {
101              println!(
102                  "{:<10}, {:<4}, {:<26}, {:<26}, {:<4}, {:<3}, {:<3}",
103                  date,
104                  service.arrival_time,
105                  service.origin,
106                  service.destination,
107                  service.departure_time,
108                  service.platform,
109                  service.toc
110              );
111          }
112      }
113  }