scrape.rs
1 use reqwest::blocking::get; 2 use scraper::{ElementRef, Html, Selector}; 3 4 pub struct Service { 5 pub arrival_time: String, 6 pub departure_time: String, 7 pub origin: String, 8 pub destination: String, 9 pub platform: u8, 10 pub toc: String, 11 } 12 13 fn get_inner_html(selector: &Selector, service: &ElementRef, missing: &str) -> String { 14 let element = service 15 .select(selector) 16 .map(|x| x.inner_html()) 17 .next() 18 .unwrap_or_else(|| String::from(missing)); 19 element 20 } 21 22 fn get_service_list_from_html(document: &Html, missing: &str) -> Vec<Service> { 23 let mut service_list: Vec<Service> = Vec::new(); 24 25 for service in document.select( 26 &Selector::parse("a.service").expect("<a class=\"service\"> does not exist in documents"), 27 ) { 28 let destination = get_inner_html( 29 &Selector::parse("div.location.d>span") 30 .expect("<selector div.location.d>span does not exist in service"), 31 &service, 32 missing, 33 ); 34 let origin = get_inner_html( 35 &Selector::parse("div.location.o>span") 36 .expect("<selector div.location.o>span does not exist in service"), 37 &service, 38 missing, 39 ); 40 let platform_string = get_inner_html( 41 &Selector::parse("div.platform.c.act") 42 .expect("<selector div.platform.c>span does not exist in service"), 43 &service, 44 missing, 45 ); 46 let departure_time = get_inner_html( 47 &Selector::parse("div.time.d.gbtt") 48 .expect("<selector div.time.d.gbtt does not exist in service"), 49 &service, 50 missing, 51 ); 52 let arrival_time = get_inner_html( 53 &Selector::parse("div.time.a.gbtt") 54 .expect("<selector div.time.a.gbtt does not exist in service"), 55 &service, 56 missing, 57 ); 58 let toc = get_inner_html( 59 &Selector::parse("div.toc").expect("<div class=\"\" does not exist in service"), 60 &service, 61 missing, 62 ); 63 let platform = platform_string.parse().unwrap_or(255); 64 65 // if destination == String::new() { 66 // // removes terminating services 67 // continue; 68 // }; 69 70 service_list.push(Service { 71 arrival_time, 72 departure_time, 73 origin, 74 destination, 75 platform, 76 toc, 77 }); 78 } 79 service_list 80 } 81 82 #[must_use] 83 pub fn get_services(date: &str, station: &str, missing: &str) -> Vec<Service> { 84 //! 85 //! # Panics 86 //! 87 //! will panic if get request fails, or contians no text 88 let url = format!("https://www.realtimetrains.co.uk/search/detailed/gb-nr:{station}/{date}/0000-2359?stp=WVS&show=pax-calls&order=wtt"); 89 let result = get(url) 90 .expect("get request failed") 91 .text() 92 .expect("http response contains no text"); 93 let document = Html::parse_document(&result); 94 95 get_service_list_from_html(&document, missing) 96 } 97 98 pub fn csv_services(date: &String, service_list: &[Service], all_plats: bool) { 99 for service in service_list { 100 if service.platform == 0 || all_plats { 101 println!( 102 "{:<10}, {:<4}, {:<26}, {:<26}, {:<4}, {:<3}, {:<3}", 103 date, 104 service.arrival_time, 105 service.origin, 106 service.destination, 107 service.departure_time, 108 service.platform, 109 service.toc 110 ); 111 } 112 } 113 }