/ bulletind / __init__.py
__init__.py
  1  #!/usr/bin/python3
  2  """Retrieve 'bulletin D' data"""
  3  from __future__ import annotations
  4  
  5  import bisect
  6  import datetime
  7  import json
  8  import os
  9  import pathlib
 10  import sys
 11  import typing
 12  import xml.etree.ElementTree
 13  from dataclasses import dataclass, field
 14  from operator import attrgetter
 15  
 16  import bs4
 17  import platformdirs
 18  import requests
 19  from dataclasses_json import DataClassJsonMixin, config
 20  from marshmallow import fields
 21  
 22  # Copyright (C) 2022 Jeff Epler <jepler@gmail.com>
 23  # SPDX-FileCopyrightText: 2022 Jeff Epler
 24  #
 25  # SPDX-License-Identifier: GPL-3.0-only
 26  
 27  BULLETIN_D_INDEX = "https://datacenter.iers.org/availableVersions.php?id=17"
 28  
 29  DATA_PATHS = [
 30      platformdirs.user_cache_path(appname="bulletind"),
 31      pathlib.Path(__file__).resolve().parent / "data",
 32  ]
 33  
 34  
 35  @dataclass
 36  class BulletinDInfo(DataClassJsonMixin):
 37      """Type representing a Bulletin D dictionary"""
 38  
 39      date: datetime.date = field(
 40          metadata=config(
 41              encoder=datetime.date.isoformat,
 42              decoder=datetime.date.fromisoformat,
 43              mm_field=fields.DateTime(format="iso"),
 44          )
 45      )
 46      dut1: float
 47      dut1_unit: str
 48      number: int
 49      start_date: datetime.date = field(
 50          metadata=config(
 51              encoder=datetime.date.isoformat,
 52              decoder=datetime.date.fromisoformat,
 53              mm_field=fields.DateTime(format="iso"),
 54          )
 55      )
 56      start_utc: float
 57  
 58  
 59  def cache(
 60      url: str, cache_paths: typing.Optional[list[pathlib.Path]] = None
 61  ) -> BulletinDInfo:
 62      """Download a specific Bulletin & cache it in json format"""
 63      base = url.split("/")[-1].split(".")[0]
 64  
 65      cache_paths = cache_paths or DATA_PATHS
 66      for path in cache_paths:
 67          loc = path / f"{base}.json"
 68          if loc.exists():
 69              with open(loc, "r", encoding="utf-8") as data_file:
 70                  return BulletinDInfo.from_json(data_file.read())
 71  
 72      loc = cache_paths[0] / f"{base}.json"
 73      tmp_loc = cache_paths[0] / f"{base}.json.tmp"
 74  
 75      print(f"Fetching {url} to {loc}", file=sys.stderr)
 76      buld_xml = requests.get(url).text
 77      doc = xml.etree.ElementTree.XML(buld_xml)
 78  
 79      def find_el(element_name: str) -> xml.etree.ElementTree.Element:
 80          element = doc.find(f".//{{http://www.iers.org/2003/schema/iers}}{element_name}")
 81          assert element is not None
 82          return element
 83  
 84      def find(element_name: str) -> str:
 85          element = find_el(element_name)
 86          return element.text or ""
 87  
 88      def as_date(date_str: str) -> datetime.date:
 89          return datetime.date.fromisoformat(date_str)
 90  
 91      data = BulletinDInfo(
 92          date=as_date(find("date")),
 93          start_date=as_date(find("startDate")),
 94          start_utc=float(find("startUTC")),
 95          number=int(find("number")),
 96          dut1=float(find("DUT1")),
 97          dut1_unit=find_el("DUT1").attrib.get("unit", "s"),
 98      )
 99  
100      with open(tmp_loc, "wt", encoding="utf-8") as data_file:
101          print(data.to_json(indent=4), file=data_file)
102          data_file.close()
103          os.rename(tmp_loc, loc)
104          print(data)
105          return data
106  
107  
108  def get_bulletin_d_data(
109      cache_paths: typing.Optional[list[pathlib.Path]] = None,
110  ) -> list[BulletinDInfo]:
111      """Download and return all available Bulletin D data"""
112      for path in DATA_PATHS:
113          os.makedirs(path, exist_ok=True)
114  
115      buld_text = requests.get(BULLETIN_D_INDEX).text
116      buld_data = bs4.BeautifulSoup(buld_text, features="html.parser")
117      refs = buld_data.findAll(lambda tag: "xml" in tag.get("href", ""))
118  
119      return [cache(r["href"], cache_paths) for r in refs]
120  
121  
122  def get_cached_bulletin_d_data() -> list[BulletinDInfo]:
123      """Return all cached Bulletin D data"""
124  
125      def content(filename: pathlib.Path) -> BulletinDInfo:
126          with open(filename, "r", encoding="utf-8") as data_file:
127              return BulletinDInfo.from_json(data_file.read())
128  
129      return sorted(
130          (content(p) for path in DATA_PATHS for p in path.glob("*.json")),
131          key=attrgetter("start_date"),
132      )
133  
134  
135  def get_bulletin_d_by_date(date: datetime.date) -> BulletinDInfo | None:
136      """Return the Bulletin D effective on the given date"""
137      data = get_cached_bulletin_d_data()
138      idx = bisect.bisect([d.start_date for d in data], date)
139      if idx == 0:  # len(data):
140          return None
141      return data[idx - 1]