Cradicle Explorer

/ examples / di / arxiv_reader.py
arxiv_reader.py
 1  #!/usr/bin/env python
 2  # -*- coding: utf-8 -*-
 3  from metagpt.roles.di.data_interpreter import DataInterpreter
 4  from metagpt.tools.libs.web_scraping import view_page_element_to_scrape
 5  
 6  
 7  async def main():
 8      template = "https://arxiv.org/list/{tag}/pastweek?skip=0&show=300"
 9      tags = ["cs.ai", "cs.cl", "cs.lg", "cs.se"]
10      urls = [template.format(tag=tag) for tag in tags]
11      prompt = f"""This is a collection of arxiv urls: '{urls}' .
12  Record each article, remove duplicates by title (they may have multiple tags), filter out papers related to 
13  large language model / agent / llm, print top 100 and visualize the word count of the titles"""
14      di = DataInterpreter(react_mode="react", tools=[view_page_element_to_scrape.__name__])
15  
16      await di.run(prompt)
17  
18  
19  if __name__ == "__main__":
20      import asyncio
21  
22      asyncio.run(main())