/ examples / rag / omniparse.py
omniparse.py
 1  import asyncio
 2  
 3  from metagpt.config2 import config
 4  from metagpt.const import EXAMPLE_DATA_PATH
 5  from metagpt.logs import logger
 6  from metagpt.rag.parsers import OmniParse
 7  from metagpt.rag.schema import OmniParseOptions, OmniParseType, ParseResultType
 8  from metagpt.utils.omniparse_client import OmniParseClient
 9  
10  TEST_DOCX = EXAMPLE_DATA_PATH / "omniparse/test01.docx"
11  TEST_PDF = EXAMPLE_DATA_PATH / "omniparse/test02.pdf"
12  TEST_VIDEO = EXAMPLE_DATA_PATH / "omniparse/test03.mp4"
13  TEST_AUDIO = EXAMPLE_DATA_PATH / "omniparse/test04.mp3"
14  
15  
16  async def omniparse_client_example():
17      client = OmniParseClient(base_url=config.omniparse.base_url)
18  
19      # docx
20      with open(TEST_DOCX, "rb") as f:
21          file_input = f.read()
22      document_parse_ret = await client.parse_document(file_input=file_input, bytes_filename="test_01.docx")
23      logger.info(document_parse_ret)
24  
25      # pdf
26      pdf_parse_ret = await client.parse_pdf(file_input=TEST_PDF)
27      logger.info(pdf_parse_ret)
28  
29      # video
30      video_parse_ret = await client.parse_video(file_input=TEST_VIDEO)
31      logger.info(video_parse_ret)
32  
33      # audio
34      audio_parse_ret = await client.parse_audio(file_input=TEST_AUDIO)
35      logger.info(audio_parse_ret)
36  
37  
38  async def omniparse_example():
39      parser = OmniParse(
40          api_key=config.omniparse.api_key,
41          base_url=config.omniparse.base_url,
42          parse_options=OmniParseOptions(
43              parse_type=OmniParseType.PDF,
44              result_type=ParseResultType.MD,
45              max_timeout=120,
46              num_workers=3,
47          ),
48      )
49      ret = parser.load_data(file_path=TEST_PDF)
50      logger.info(ret)
51  
52      file_paths = [TEST_DOCX, TEST_PDF]
53      parser.parse_type = OmniParseType.DOCUMENT
54      ret = await parser.aload_data(file_path=file_paths)
55      logger.info(ret)
56  
57  
58  async def main():
59      await omniparse_client_example()
60      await omniparse_example()
61  
62  
63  if __name__ == "__main__":
64      asyncio.run(main())