arxiv_reader.py
1 #!/usr/bin/env python 2 # -*- coding: utf-8 -*- 3 from metagpt.roles.di.data_interpreter import DataInterpreter 4 from metagpt.tools.libs.web_scraping import view_page_element_to_scrape 5 6 7 async def main(): 8 template = "https://arxiv.org/list/{tag}/pastweek?skip=0&show=300" 9 tags = ["cs.ai", "cs.cl", "cs.lg", "cs.se"] 10 urls = [template.format(tag=tag) for tag in tags] 11 prompt = f"""This is a collection of arxiv urls: '{urls}' . 12 Record each article, remove duplicates by title (they may have multiple tags), filter out papers related to 13 large language model / agent / llm, print top 100 and visualize the word count of the titles""" 14 di = DataInterpreter(react_mode="react", tools=[view_page_element_to_scrape.__name__]) 15 16 await di.run(prompt) 17 18 19 if __name__ == "__main__": 20 import asyncio 21 22 asyncio.run(main())