Cradicle Explorer

llm.py
  1  # Copyright (c) 2024-2026 Tencent Zhuque Lab. All rights reserved.
  2  #
  3  # Licensed under the Apache License, Version 2.0 (the "License");
  4  # you may not use this file except in compliance with the License.
  5  # You may obtain a copy of the License at
  6  #
  7  #     http://www.apache.org/licenses/LICENSE-2.0
  8  #
  9  # Unless required by applicable law or agreed to in writing, software
 10  # distributed under the License is distributed on an "AS IS" BASIS,
 11  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12  # See the License for the specific language governing permissions and
 13  # limitations under the License.
 14  #
 15  # Requirement: Any integration or derivative work must explicitly attribute
 16  # Tencent Zhuque Lab (https://github.com/Tencent/AI-Infra-Guard) in its
 17  # documentation or user interface, as detailed in the NOTICE file.
 18  
 19  import time
 20  
 21  import openai
 22  
 23  from utils.loging import logger
 24  
 25  
 26  class LLM:
 27      def __init__(
 28          self,
 29          model: str,
 30          api_key: str,
 31          base_url: str,
 32          context_window: int | None = None,
 33      ):
 34          self.model = model
 35          self.api_key = api_key
 36          self.base_url = base_url
 37          self.client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url, timeout=60)
 38          self.temperature = 0.7
 39          # 用于估算压缩阈值，不依赖接口动态返回模型规格。
 40          self.context_window = context_window
 41  
 42      def chat(self, message: list[dict], p=False, ret_usage=False) -> str | tuple[str, dict]:
 43          ret = ""
 44          usage = None
 45          retry = 0
 46  
 47          while True:
 48              ret, usage = self.chat_stream(message)
 49              if ret != "":
 50                  break
 51              else:
 52                  retry += 1
 53                  logger.error(f"LLM chat error, retry {retry}")
 54                  time.sleep(1.3)
 55                  if retry > 5:
 56                      logger.error("LLM chat error, retry 5 times, exit")
 57                      ret = "连接LLM失败，已重试5次，模型输出为空,请等待1分钟后再试"
 58                      break
 59          if p:
 60              print(ret)
 61  
 62          if ret_usage:
 63              return ret, usage
 64          return ret
 65  
 66      def chat_stream(self, message: list[dict]) -> tuple[str, dict]:
 67          response = self.client.chat.completions.create(
 68              model=self.model,
 69              messages=message,
 70              temperature=self.temperature,
 71              stream=True,
 72              # usage 一般在流式结束时返回，前面的 chunk 通常为空。
 73              stream_options={"include_usage": True},
 74          )
 75  
 76          ret = ""
 77          usage = None
 78  
 79          for chunk in response:
 80              _usage = getattr(chunk, "usage", None)
 81              if _usage:
 82                  usage = self._normalize_usage(_usage)
 83  
 84              choices = getattr(chunk, "choices", None)
 85  
 86              # Ensure choices is a non-empty list
 87              if not isinstance(choices, list) or not choices:
 88                  continue
 89              choice = choices[0]
 90  
 91              delta = getattr(choice, "delta", None)
 92              if not delta:
 93                  continue
 94  
 95              content = getattr(delta, "content", None)
 96              if content:
 97                  ret += content
 98  
 99          return ret, usage
100  
101      def _normalize_usage(self, usage) -> dict | None:
102          if not usage:
103              return None
104  
105          return {
106              "prompt_tokens": getattr(usage, "prompt_tokens", None),
107              "completion_tokens": getattr(usage, "completion_tokens", None),
108              "total_tokens": getattr(usage, "total_tokens", None),
109          }