llm.py
1 # Copyright (c) 2024-2026 Tencent Zhuque Lab. All rights reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # 15 # Requirement: Any integration or derivative work must explicitly attribute 16 # Tencent Zhuque Lab (https://github.com/Tencent/AI-Infra-Guard) in its 17 # documentation or user interface, as detailed in the NOTICE file. 18 19 import time 20 21 import openai 22 23 from utils.loging import logger 24 25 26 class LLM: 27 def __init__( 28 self, 29 model: str, 30 api_key: str, 31 base_url: str, 32 context_window: int | None = None, 33 ): 34 self.model = model 35 self.api_key = api_key 36 self.base_url = base_url 37 self.client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url, timeout=60) 38 self.temperature = 0.7 39 # 用于估算压缩阈值,不依赖接口动态返回模型规格。 40 self.context_window = context_window 41 42 def chat(self, message: list[dict], p=False, ret_usage=False) -> str | tuple[str, dict]: 43 ret = "" 44 usage = None 45 retry = 0 46 47 while True: 48 ret, usage = self.chat_stream(message) 49 if ret != "": 50 break 51 else: 52 retry += 1 53 logger.error(f"LLM chat error, retry {retry}") 54 time.sleep(1.3) 55 if retry > 5: 56 logger.error("LLM chat error, retry 5 times, exit") 57 ret = "连接LLM失败,已重试5次,模型输出为空,请等待1分钟后再试" 58 break 59 if p: 60 print(ret) 61 62 if ret_usage: 63 return ret, usage 64 return ret 65 66 def chat_stream(self, message: list[dict]) -> tuple[str, dict]: 67 response = self.client.chat.completions.create( 68 model=self.model, 69 messages=message, 70 temperature=self.temperature, 71 stream=True, 72 # usage 一般在流式结束时返回,前面的 chunk 通常为空。 73 stream_options={"include_usage": True}, 74 ) 75 76 ret = "" 77 usage = None 78 79 for chunk in response: 80 _usage = getattr(chunk, "usage", None) 81 if _usage: 82 usage = self._normalize_usage(_usage) 83 84 choices = getattr(chunk, "choices", None) 85 86 # Ensure choices is a non-empty list 87 if not isinstance(choices, list) or not choices: 88 continue 89 choice = choices[0] 90 91 delta = getattr(choice, "delta", None) 92 if not delta: 93 continue 94 95 content = getattr(delta, "content", None) 96 if content: 97 ret += content 98 99 return ret, usage 100 101 def _normalize_usage(self, usage) -> dict | None: 102 if not usage: 103 return None 104 105 return { 106 "prompt_tokens": getattr(usage, "prompt_tokens", None), 107 "completion_tokens": getattr(usage, "completion_tokens", None), 108 "total_tokens": getattr(usage, "total_tokens", None), 109 }