__init__.py
1 #!/usr/bin/python3 2 """Retrieve 'bulletin D' data""" 3 from __future__ import annotations 4 5 import bisect 6 import datetime 7 import json 8 import os 9 import pathlib 10 import sys 11 import typing 12 import xml.etree.ElementTree 13 from dataclasses import dataclass, field 14 from operator import attrgetter 15 16 import bs4 17 import platformdirs 18 import requests 19 from dataclasses_json import DataClassJsonMixin, config 20 from marshmallow import fields 21 22 # Copyright (C) 2022 Jeff Epler <jepler@gmail.com> 23 # SPDX-FileCopyrightText: 2022 Jeff Epler 24 # 25 # SPDX-License-Identifier: GPL-3.0-only 26 27 BULLETIN_D_INDEX = "https://datacenter.iers.org/availableVersions.php?id=17" 28 29 DATA_PATHS = [ 30 platformdirs.user_cache_path(appname="bulletind"), 31 pathlib.Path(__file__).resolve().parent / "data", 32 ] 33 34 35 @dataclass 36 class BulletinDInfo(DataClassJsonMixin): 37 """Type representing a Bulletin D dictionary""" 38 39 date: datetime.date = field( 40 metadata=config( 41 encoder=datetime.date.isoformat, 42 decoder=datetime.date.fromisoformat, 43 mm_field=fields.DateTime(format="iso"), 44 ) 45 ) 46 dut1: float 47 dut1_unit: str 48 number: int 49 start_date: datetime.date = field( 50 metadata=config( 51 encoder=datetime.date.isoformat, 52 decoder=datetime.date.fromisoformat, 53 mm_field=fields.DateTime(format="iso"), 54 ) 55 ) 56 start_utc: float 57 58 59 def cache( 60 url: str, cache_paths: typing.Optional[list[pathlib.Path]] = None 61 ) -> BulletinDInfo: 62 """Download a specific Bulletin & cache it in json format""" 63 base = url.split("/")[-1].split(".")[0] 64 65 cache_paths = cache_paths or DATA_PATHS 66 for path in cache_paths: 67 loc = path / f"{base}.json" 68 if loc.exists(): 69 with open(loc, "r", encoding="utf-8") as data_file: 70 return BulletinDInfo.from_json(data_file.read()) 71 72 loc = cache_paths[0] / f"{base}.json" 73 tmp_loc = cache_paths[0] / f"{base}.json.tmp" 74 75 print(f"Fetching {url} to {loc}", file=sys.stderr) 76 buld_xml = requests.get(url).text 77 doc = xml.etree.ElementTree.XML(buld_xml) 78 79 def find_el(element_name: str) -> xml.etree.ElementTree.Element: 80 element = doc.find(f".//{{http://www.iers.org/2003/schema/iers}}{element_name}") 81 assert element is not None 82 return element 83 84 def find(element_name: str) -> str: 85 element = find_el(element_name) 86 return element.text or "" 87 88 def as_date(date_str: str) -> datetime.date: 89 return datetime.date.fromisoformat(date_str) 90 91 data = BulletinDInfo( 92 date=as_date(find("date")), 93 start_date=as_date(find("startDate")), 94 start_utc=float(find("startUTC")), 95 number=int(find("number")), 96 dut1=float(find("DUT1")), 97 dut1_unit=find_el("DUT1").attrib.get("unit", "s"), 98 ) 99 100 with open(tmp_loc, "wt", encoding="utf-8") as data_file: 101 print(data.to_json(indent=4), file=data_file) 102 data_file.close() 103 os.rename(tmp_loc, loc) 104 print(data) 105 return data 106 107 108 def get_bulletin_d_data( 109 cache_paths: typing.Optional[list[pathlib.Path]] = None, 110 ) -> list[BulletinDInfo]: 111 """Download and return all available Bulletin D data""" 112 for path in DATA_PATHS: 113 os.makedirs(path, exist_ok=True) 114 115 buld_text = requests.get(BULLETIN_D_INDEX).text 116 buld_data = bs4.BeautifulSoup(buld_text, features="html.parser") 117 refs = buld_data.findAll(lambda tag: "xml" in tag.get("href", "")) 118 119 return [cache(r["href"], cache_paths) for r in refs] 120 121 122 def get_cached_bulletin_d_data() -> list[BulletinDInfo]: 123 """Return all cached Bulletin D data""" 124 125 def content(filename: pathlib.Path) -> BulletinDInfo: 126 with open(filename, "r", encoding="utf-8") as data_file: 127 return BulletinDInfo.from_json(data_file.read()) 128 129 return sorted( 130 (content(p) for path in DATA_PATHS for p in path.glob("*.json")), 131 key=attrgetter("start_date"), 132 ) 133 134 135 def get_bulletin_d_by_date(date: datetime.date) -> BulletinDInfo | None: 136 """Return the Bulletin D effective on the given date""" 137 data = get_cached_bulletin_d_data() 138 idx = bisect.bisect([d.start_date for d in data], date) 139 if idx == 0: # len(data): 140 return None 141 return data[idx - 1]