/ examples / dalle_gpt4v_agent.py
dalle_gpt4v_agent.py
 1  #!/usr/bin/env python
 2  # -*- coding: utf-8 -*-
 3  # @Desc   : use gpt4v to improve prompt and draw image with dall-e-3
 4  
 5  """set `model: "gpt-4-vision-preview"` in `config2.yaml` first"""
 6  
 7  import asyncio
 8  
 9  from PIL import Image
10  
11  from metagpt.actions.action import Action
12  from metagpt.logs import logger
13  from metagpt.roles.role import Role
14  from metagpt.schema import Message
15  from metagpt.utils.common import encode_image
16  
17  
18  class GenAndImproveImageAction(Action):
19      save_image: bool = True
20  
21      async def generate_image(self, prompt: str) -> Image:
22          imgs = await self.llm.gen_image(model="dall-e-3", prompt=prompt)
23          return imgs[0]
24  
25      async def refine_prompt(self, old_prompt: str, image: Image) -> str:
26          msg = (
27              f"You are a creative painter, with the given generated image and old prompt: {old_prompt}, "
28              f"please refine the prompt and generate new one. Just output the new prompt."
29          )
30          b64_img = encode_image(image)
31          new_prompt = await self.llm.aask(msg=msg, images=[b64_img])
32          return new_prompt
33  
34      async def evaluate_images(self, old_prompt: str, images: list[Image]) -> str:
35          msg = (
36              "With the prompt and two generated image, to judge if the second one is better than the first one. "
37              "If so, just output True else output False"
38          )
39          b64_imgs = [encode_image(img) for img in images]
40          res = await self.llm.aask(msg=msg, images=b64_imgs)
41          return res
42  
43      async def run(self, messages: list[Message]) -> str:
44          prompt = messages[-1].content
45  
46          old_img: Image = await self.generate_image(prompt)
47          new_prompt = await self.refine_prompt(old_prompt=prompt, image=old_img)
48          logger.info(f"original prompt: {prompt}")
49          logger.info(f"refined prompt: {new_prompt}")
50          new_img: Image = await self.generate_image(new_prompt)
51          if self.save_image:
52              old_img.save("./img_by-dall-e_old.png")
53              new_img.save("./img_by-dall-e_new.png")
54          res = await self.evaluate_images(old_prompt=prompt, images=[old_img, new_img])
55          opinion = f"The second generated image is better than the first one: {res}"
56          logger.info(f"evaluate opinion: {opinion}")
57          return opinion
58  
59  
60  class Painter(Role):
61      name: str = "MaLiang"
62      profile: str = "Painter"
63      goal: str = "to generate fine painting"
64  
65      def __init__(self, **data):
66          super().__init__(**data)
67  
68          self.set_actions([GenAndImproveImageAction])
69  
70  
71  async def main():
72      role = Painter()
73      await role.run(with_message="a girl with flowers")
74  
75  
76  if __name__ == "__main__":
77      asyncio.run(main())