dalle_gpt4v_agent.py
1 #!/usr/bin/env python 2 # -*- coding: utf-8 -*- 3 # @Desc : use gpt4v to improve prompt and draw image with dall-e-3 4 5 """set `model: "gpt-4-vision-preview"` in `config2.yaml` first""" 6 7 import asyncio 8 9 from PIL import Image 10 11 from metagpt.actions.action import Action 12 from metagpt.logs import logger 13 from metagpt.roles.role import Role 14 from metagpt.schema import Message 15 from metagpt.utils.common import encode_image 16 17 18 class GenAndImproveImageAction(Action): 19 save_image: bool = True 20 21 async def generate_image(self, prompt: str) -> Image: 22 imgs = await self.llm.gen_image(model="dall-e-3", prompt=prompt) 23 return imgs[0] 24 25 async def refine_prompt(self, old_prompt: str, image: Image) -> str: 26 msg = ( 27 f"You are a creative painter, with the given generated image and old prompt: {old_prompt}, " 28 f"please refine the prompt and generate new one. Just output the new prompt." 29 ) 30 b64_img = encode_image(image) 31 new_prompt = await self.llm.aask(msg=msg, images=[b64_img]) 32 return new_prompt 33 34 async def evaluate_images(self, old_prompt: str, images: list[Image]) -> str: 35 msg = ( 36 "With the prompt and two generated image, to judge if the second one is better than the first one. " 37 "If so, just output True else output False" 38 ) 39 b64_imgs = [encode_image(img) for img in images] 40 res = await self.llm.aask(msg=msg, images=b64_imgs) 41 return res 42 43 async def run(self, messages: list[Message]) -> str: 44 prompt = messages[-1].content 45 46 old_img: Image = await self.generate_image(prompt) 47 new_prompt = await self.refine_prompt(old_prompt=prompt, image=old_img) 48 logger.info(f"original prompt: {prompt}") 49 logger.info(f"refined prompt: {new_prompt}") 50 new_img: Image = await self.generate_image(new_prompt) 51 if self.save_image: 52 old_img.save("./img_by-dall-e_old.png") 53 new_img.save("./img_by-dall-e_new.png") 54 res = await self.evaluate_images(old_prompt=prompt, images=[old_img, new_img]) 55 opinion = f"The second generated image is better than the first one: {res}" 56 logger.info(f"evaluate opinion: {opinion}") 57 return opinion 58 59 60 class Painter(Role): 61 name: str = "MaLiang" 62 profile: str = "Painter" 63 goal: str = "to generate fine painting" 64 65 def __init__(self, **data): 66 super().__init__(**data) 67 68 self.set_actions([GenAndImproveImageAction]) 69 70 71 async def main(): 72 role = Painter() 73 await role.run(with_message="a girl with flowers") 74 75 76 if __name__ == "__main__": 77 asyncio.run(main())