Cradicle Explorer

views.py
  1  import datetime
  2  from rfeed import *
  3  from flask import Blueprint, request, render_template, make_response
  4  
  5  import allthethings.utils
  6  
  7  # Note that /blog is not a real path; we do a trick with BlogMiddleware in app.py to rewrite annas-blog.org here.
  8  # For local testing, use http://annas-blog.org.localtest.me:8000/
  9  blog = Blueprint("blog", __name__, template_folder="templates", url_prefix="/blog")
 10  
 11  @blog.get("/")
 12  @allthethings.utils.public_cache(minutes=5, cloudflare_minutes=60*24*7)
 13  def index():
 14      return render_template("blog/index.html")
 15  
 16  @blog.get("/duxiu-exclusive.html")
 17  @allthethings.utils.public_cache(minutes=5, cloudflare_minutes=60*24*7)
 18  def duxiu_exclusive():
 19      return render_template("blog/duxiu-exclusive.html")
 20  @blog.get("/duxiu-exclusive-chinese.html")
 21  @allthethings.utils.public_cache(minutes=5, cloudflare_minutes=60*24*7)
 22  def duxiu_exclusive_chinese():
 23      return render_template("blog/duxiu-exclusive-chinese.html")
 24  @blog.get("/worldcat-scrape.html")
 25  @allthethings.utils.public_cache(minutes=5, cloudflare_minutes=60*24*7)
 26  def worldcat_scrape():
 27      return render_template("blog/worldcat-scrape.html")
 28  @blog.get("/annas-archive-containers.html")
 29  @allthethings.utils.public_cache(minutes=5, cloudflare_minutes=60*24*7)
 30  def aac():
 31      return render_template("blog/annas-archive-containers.html")
 32  @blog.get("/backed-up-the-worlds-largest-comics-shadow-lib.html")
 33  @allthethings.utils.public_cache(minutes=5, cloudflare_minutes=60*24*7)
 34  def comics():
 35      return render_template("blog/backed-up-the-worlds-largest-comics-shadow-lib.html")
 36  @blog.get("/how-to-run-a-shadow-library.html")
 37  @allthethings.utils.public_cache(minutes=5, cloudflare_minutes=60*24*7)
 38  def how_to_run_a_shadow_library():
 39      return render_template("blog/how-to-run-a-shadow-library.html")
 40  @blog.get("/it-how-to-run-a-shadow-library.html")
 41  @allthethings.utils.public_cache(minutes=5, cloudflare_minutes=60*24*7)
 42  def it_how_to_run_a_shadow_library():
 43      return render_template("blog/it-how-to-run-a-shadow-library.html")
 44  @blog.get("/annas-update-open-source-elasticsearch-covers.html")
 45  @allthethings.utils.public_cache(minutes=5, cloudflare_minutes=60*24*7)
 46  def annas_update_open_source_elasticsearch_covers():
 47      return render_template("blog/annas-update-open-source-elasticsearch-covers.html")
 48  @blog.get("/help-seed-zlibrary-on-ipfs.html")
 49  @allthethings.utils.public_cache(minutes=5, cloudflare_minutes=60*24*7)
 50  def help_seed_zlibrary_on_ipfs():
 51      return render_template("blog/help-seed-zlibrary-on-ipfs.html")
 52  @blog.get("/putting-5,998,794-books-on-ipfs.html")
 53  @allthethings.utils.public_cache(minutes=5, cloudflare_minutes=60*24*7)
 54  def putting_5998794_books_on_ipfs():
 55      return render_template("blog/putting-5,998,794-books-on-ipfs.html")
 56  @blog.get("/blog-isbndb-dump-how-many-books-are-preserved-forever.html")
 57  @allthethings.utils.public_cache(minutes=5, cloudflare_minutes=60*24*7)
 58  def blog_isbndb_dump_how_many_books_are_preserved_forever():
 59      return render_template("blog/blog-isbndb-dump-how-many-books-are-preserved-forever.html")
 60  @blog.get("/blog-how-to-become-a-pirate-archivist.html")
 61  @allthethings.utils.public_cache(minutes=5, cloudflare_minutes=60*24*7)
 62  def blog_how_to_become_a_pirate_archivist():
 63      return render_template("blog/blog-how-to-become-a-pirate-archivist.html")
 64  @blog.get("/blog-3x-new-books.html")
 65  @allthethings.utils.public_cache(minutes=5, cloudflare_minutes=60*24*7)
 66  def blog_3x_new_books():
 67      return render_template("blog/blog-3x-new-books.html")
 68  @blog.get("/blog-introducing.html")
 69  @allthethings.utils.public_cache(minutes=5, cloudflare_minutes=60*24*7)
 70  def blog_introducing():
 71      return render_template("blog/blog-introducing.html")
 72  
 73  @blog.get("/rss.xml")
 74  @allthethings.utils.public_cache(minutes=5, cloudflare_minutes=60*24*7)
 75  def rss_xml():
 76      items = [
 77          Item(
 78              title = "Introducing the Pirate Library Mirror: Preserving 7TB of books (that are not in Libgen)",
 79              link = "https://annas-blog.org/blog-introducing.html",
 80              description = "The first library that we have mirrored is Z-Library. This is a popular (and illegal) library.",
 81              author = "Anna and the team",
 82              pubDate = datetime.datetime(2022,7,1),
 83          ),
 84          Item(
 85              title = "3x new books added to the Pirate Library Mirror (+24TB, 3.8 million books)",
 86              link = "https://annas-blog.org/blog-3x-new-books.html",
 87              description = "We have also gone back and scraped some books that we missed the first time around. All in all, this new collection is about 24TB, which is much bigger than the last one (7TB).",
 88              author = "Anna and the team",
 89              pubDate = datetime.datetime(2022,9,25),
 90          ),
 91          Item(
 92              title = "How to become a pirate archivist",
 93              link = "https://annas-blog.org/blog-how-to-become-a-pirate-archivist.html",
 94              description = "The first challenge might be a supriring one. It is not a technical problem, or a legal problem. It is a psychological problem.",
 95              author = "Anna and the team",
 96              pubDate = datetime.datetime(2022,10,17),
 97          ),
 98          Item(
 99              title = "ISBNdb dump, or How Many Books Are Preserved Forever?",
100              link = "https://annas-blog.org/blog-isbndb-dump-how-many-books-are-preserved-forever.html",
101              description = "If we were to properly deduplicate the files from shadow libraries, what percentage of all the books in the world have we preserved?",
102              author = "Anna and the team",
103              pubDate = datetime.datetime(2022,10,31),
104          ),
105          Item(
106              title = "Putting 5,998,794 books on IPFS",
107              link = "https://annas-blog.org/putting-5,998,794-books-on-ipfs.html",
108              description = "Putting dozens of terabytes of data on IPFS is no joke.",
109              author = "Anna and the team",
110              pubDate = datetime.datetime(2022,11,19),
111          ),
112          Item(
113              title = "Help seed Z-Library on IPFS",
114              link = "https://annas-blog.org/help-seed-zlibrary-on-ipfs.html",
115              description = "YOU can help preserve access to this collection.",
116              author = "Anna and the team",
117              pubDate = datetime.datetime(2022,11,22),
118          ),
119          Item(
120              title = "Anna’s Update: fully open source archive, ElasticSearch, 300GB+ of book covers",
121              link = "https://annas-blog.org/annas-update-open-source-elasticsearch-covers.html",
122              description = "We’ve been working around the clock to provide a good alternative with Anna’s Archive. Here are some of the things we achieved recently.",
123              author = "Anna and the team",
124              pubDate = datetime.datetime(2022,12,9),
125          ),
126          Item(
127              title = "How to run a shadow library: operations at Anna’s Archive",
128              link = "https://annas-blog.org/how-to-run-a-shadow-library.html",
129              description = "There is no “AWS for shadow charities”, so how do we run Anna’s Archive?",
130              author = "Anna and the team",
131              pubDate = datetime.datetime(2023,3,19),
132          ),
133          Item(
134              title = "Anna’s Archive has backed up the world’s largest comics shadow library (95TB) — you can help seed it",
135              link = "https://annas-blog.org/backed-up-the-worlds-largest-comics-shadow-lib.html",
136              description = "The largest comic books shadow library in the world had a single point of failure.. until today.",
137              author = "Anna and the team",
138              pubDate = datetime.datetime(2023,5,13),
139          ),
140          Item(
141              title = "Anna’s Archive Containers (AAC): standardizing releases from the world’s largest shadow library",
142              link = "https://annas-blog.org/annas-archive-containers.html",
143              description = "Anna’s Archive has become the largest shadow library in the world, requiring us to standardize our releases.",
144              author = "Anna and the team",
145              pubDate = datetime.datetime(2023,8,15),
146          ),
147          Item(
148              title = "1.3B WorldCat scrape & data science mini-competition",
149              link = "https://annas-blog.org/worldcat-scrape.html",
150              description = "Anna’s Archive scraped all of WorldCat to make a TODO list of books that need to be preserved, and is hosting a data science mini-competition.",
151              author = "Anna and the team",
152              pubDate = datetime.datetime(2023,10,3),
153          ),
154          Item(
155              title = "Exclusive access for LLM companies to largest Chinese non-fiction book collection in the world",
156              link = "https://annas-blog.org/duxiu-exclusive.html",
157              description = "Anna’s Archive acquired a unique collection of 7.5 million / 350TB Chinese non-fiction books — larger than Library Genesis. We’re willing to give an LLM company exclusive access, in exchange for high-quality OCR and text extraction.",
158              author = "Anna and the team",
159              pubDate = datetime.datetime(2023,11,4),
160          ),
161      ]
162  
163      feed = Feed(
164          title = "Anna’s Blog",
165          link = "https://annas-blog.org/",
166          description = "Hi, I’m Anna. I created Anna’s Archive. This is my personal blog, in which I and my teammates write about piracy, digital preservation, and more.",
167          language = "en-US",
168          lastBuildDate = datetime.datetime.now(),
169          items = items,
170      )
171       
172      response = make_response(feed.rss())
173      response.headers['Content-Type'] = 'application/rss+xml; charset=utf-8'
174      return response