/ doc / MIDDLEWARE.md
MIDDLEWARE.md
  1  # Middleware 系统文档
  2  
  3  beamai_agent 的 Middleware 系统提供了一种灵活的方式来拦截、修改和控制 Agent 执行的各个阶段。
  4  
  5  ## 目录
  6  
  7  - [概述](#概述)
  8  - [生命周期钩子](#生命周期钩子)
  9  - [内置 Middleware](#内置-middleware)
 10  - [预设配置](#预设配置)
 11  - [自定义 Middleware](#自定义-middleware)
 12  - [配置和使用](#配置和使用)
 13  - [高级用法](#高级用法)
 14  
 15  ---
 16  
 17  ## 概述
 18  
 19  Middleware 是 Agent 执行过程中的拦截器,可以:
 20  
 21  - **修改输入/输出**: 在 LLM 调用前后修改消息
 22  - **控制流程**: 跳过、重试或中止执行
 23  - **添加功能**: 日志记录、监控、人工审批等
 24  - **实施限制**: 调用次数限制、Token 限制等
 25  
 26  ### 架构图
 27  
 28  ```
 29  ┌─────────────────────────────────────────────────────────────┐
 30  │                        Agent 执行                            │
 31  ├─────────────────────────────────────────────────────────────┤
 32  │                                                              │
 33  │  ┌──────────────┐                                           │
 34  │  │ before_agent │  ← Agent 开始前                            │
 35  │  └──────┬───────┘                                           │
 36  │         │                                                    │
 37  │         ▼                                                    │
 38  │  ┌────────────────────────────────────────────────────────┐ │
 39  │  │                    Agent Loop                          │ │
 40  │  │  ┌──────────────┐                                      │ │
 41  │  │  │ before_model │  ← LLM 调用前                         │ │
 42  │  │  └──────┬───────┘                                      │ │
 43  │  │         │                                              │ │
 44  │  │         ▼                                              │ │
 45  │  │  ┌──────────────┐                                      │ │
 46  │  │  │   LLM Call   │                                      │ │
 47  │  │  └──────┬───────┘                                      │ │
 48  │  │         │                                              │ │
 49  │  │         ▼                                              │ │
 50  │  │  ┌──────────────┐                                      │ │
 51  │  │  │ after_model  │  ← LLM 响应后                         │ │
 52  │  │  └──────┬───────┘                                      │ │
 53  │  │         │                                              │ │
 54  │  │         ▼                                              │ │
 55  │  │  ┌──────────────┐                                      │ │
 56  │  │  │ before_tools │  ← 工具执行前                         │ │
 57  │  │  └──────┬───────┘                                      │ │
 58  │  │         │                                              │ │
 59  │  │         ▼                                              │ │
 60  │  │  ┌──────────────┐                                      │ │
 61  │  │  │Tool Execution│                                      │ │
 62  │  │  └──────┬───────┘                                      │ │
 63  │  │         │                                              │ │
 64  │  │         ▼                                              │ │
 65  │  │  ┌──────────────┐                                      │ │
 66  │  │  │ after_tools  │  ← 工具执行后                         │ │
 67  │  │  └──────┬───────┘                                      │ │
 68  │  │         │                                              │ │
 69  │  └─────────┴──────────────────────────────────────────────┘ │
 70  │         │                                                    │
 71  │         ▼                                                    │
 72  │  ┌──────────────┐                                           │
 73  │  │ after_agent  │  ← Agent 结束后                            │
 74  │  └──────────────┘                                           │
 75  │                                                              │
 76  └─────────────────────────────────────────────────────────────┘
 77  ```
 78  
 79  ---
 80  
 81  ## 生命周期钩子
 82  
 83  ### 钩子列表
 84  
 85  | 钩子 | 触发时机 | 典型用途 |
 86  |------|----------|----------|
 87  | `before_agent` | Agent 执行开始前 | 初始化计数器、记录开始时间 |
 88  | `after_agent` | Agent 执行结束后 | 清理资源、记录结束状态 |
 89  | `before_model` | 每次 LLM 调用前 | 检查限制、修改 messages、添加上下文 |
 90  | `after_model` | LLM 返回后 | 处理响应、记录日志、触发后续动作 |
 91  | `before_tools` | 工具执行前 | 人工审批、参数验证、工具过滤 |
 92  | `after_tools` | 工具执行后 | 结果验证、失败重试、结果转换 |
 93  
 94  ### 返回值类型
 95  
 96  Middleware 钩子函数可以返回以下值:
 97  
 98  ```erlang
 99  %% 无修改,继续执行
100  ok
101  
102  %% 更新图状态
103  {update, #{key => value}}
104  
105  %% 跳转到指定节点
106  {goto, model | tools | '__end__'}
107  
108  %% 更新状态并跳转
109  {update_goto, #{key => value}, model | tools | '__end__'}
110  
111  %% 中止执行并返回错误
112  {halt, Reason}
113  
114  %% 中断等待用户确认
115  {interrupt, #{type => tool_approval, data => Data}}
116  ```
117  
118  ---
119  
120  ## 内置 Middleware
121  
122  ### 1. middleware_call_limit - 调用限制
123  
124  限制 Agent 执行过程中的各种调用次数。
125  
126  ```erlang
127  {middleware_call_limit, #{
128      max_model_calls => 20,           %% 最大模型调用次数
129      max_tool_calls => 50,            %% 最大工具调用总次数
130      max_tool_calls_per_turn => 10,   %% 每轮最大工具调用数
131      max_iterations => 15,            %% 最大迭代次数
132      on_limit_exceeded => halt        %% 超限行为: halt | warn_and_continue
133  }}
134  ```
135  
136  ### 2. middleware_summarization - 上下文摘要
137  
138  自动压缩长对话历史。
139  
140  ```erlang
141  {middleware_summarization, #{
142      window_size => 20,               %% 保留最近 N 条消息
143      max_tokens => 4000,              %% Token 上限
144      summarize => true,               %% 是否生成摘要
145      compress_threshold => 30         %% 触发压缩的消息数阈值
146  }}
147  ```
148  
149  ### 3. middleware_human_approval - 人工审批
150  
151  在工具执行前请求人工确认。
152  
153  ```erlang
154  {middleware_human_approval, #{
155      mode => all,                     %% all | selective | custom | none
156      timeout => 60000,                %% 审批超时时间(ms)
157      timeout_action => reject,        %% 超时行为: reject | approve
158      tools => [<<"dangerous_tool">>]  %% selective 模式下需审批的工具
159  }}
160  ```
161  
162  ### 4. middleware_tool_retry - 工具重试
163  
164  工具执行失败时自动重试。
165  
166  ```erlang
167  {middleware_tool_retry, #{
168      max_retries => 3,                %% 最大重试次数
169      backoff => #{
170          type => exponential,         %% 退避类型: exponential | linear | constant
171          initial_delay => 1000,       %% 初始延迟(ms)
172          max_delay => 30000,          %% 最大延迟(ms)
173          multiplier => 2              %% 指数因子
174      },
175      retryable_errors => all          %% all | [error_type]
176  }}
177  ```
178  
179  ### 5. middleware_model_retry - 模型重试
180  
181  LLM 调用失败时自动重试。
182  
183  ```erlang
184  {middleware_model_retry, #{
185      max_retries => 3,
186      backoff => #{type => exponential, initial_delay => 1000},
187      retryable_errors => [timeout, rate_limit, server_error]
188  }}
189  ```
190  
191  ### 6. middleware_model_fallback - 模型降级
192  
193  主模型失败时切换到备用模型。
194  
195  ```erlang
196  {middleware_model_fallback, #{
197      fallback_models => [
198          #{provider => openai, model => <<"gpt-3.5-turbo">>},
199          #{provider => ollama, model => <<"llama2">>}
200      ],
201      trigger_errors => [rate_limit, timeout]
202  }}
203  ```
204  
205  ### 7. middleware_pii_detection - PII 检测
206  
207  检测并处理个人身份信息。
208  
209  ```erlang
210  {middleware_pii_detection, #{
211      action => mask,                  %% mask | warn | block
212      types => [email, phone, id_card],
213      mask_char => <<"*">>
214  }}
215  ```
216  
217  ### 8. middleware_tool_selector - 工具选择器
218  
219  根据上下文动态选择可用工具。
220  
221  ```erlang
222  {middleware_tool_selector, #{
223      strategy => context_based,       %% all | context_based | whitelist
224      whitelist => [<<"search">>, <<"calculate">>],
225      max_tools => 10
226  }}
227  ```
228  
229  ### 9. middleware_todo_list - TODO 管理
230  
231  为 Agent 提供任务追踪能力。
232  
233  ```erlang
234  {middleware_todo_list, #{
235      auto_create => true,             %% 自动创建 TODO
236      max_items => 20
237  }}
238  ```
239  
240  ### 10. middleware_shell_tool - Shell 工具
241  
242  提供安全的 Shell 命令执行。
243  
244  ```erlang
245  {middleware_shell_tool, #{
246      allowed_commands => [<<"ls">>, <<"cat">>, <<"grep">>],
247      timeout => 30000,
248      sandbox => true
249  }}
250  ```
251  
252  ### 11. middleware_file_search - 文件搜索
253  
254  提供文件和代码搜索能力。
255  
256  ```erlang
257  {middleware_file_search, #{
258      root_path => <<"/project">>,
259      max_results => 100,
260      excluded_paths => [<<"node_modules">>, <<".git">>]
261  }}
262  ```
263  
264  ### 12. middleware_context_editing - 上下文编辑
265  
266  允许动态修改对话上下文。
267  
268  ```erlang
269  {middleware_context_editing, #{
270      allow_message_deletion => true,
271      allow_message_modification => false
272  }}
273  ```
274  
275  ### 13. middleware_tool_emulator - 工具模拟
276  
277  在测试环境中模拟工具响应。
278  
279  ```erlang
280  {middleware_tool_emulator, #{
281      enabled => true,
282      responses => #{
283          <<"search">> => #{result => <<"mock search result">>}
284      }
285  }}
286  ```
287  
288  ---
289  
290  ## 预设配置
291  
292  ### 使用预设
293  
294  ```erlang
295  %% 默认配置
296  Middlewares = beamai_middleware_presets:default().
297  
298  %% 最小配置
299  Middlewares = beamai_middleware_presets:minimal().
300  
301  %% 生产环境
302  Middlewares = beamai_middleware_presets:production().
303  
304  %% 开发调试
305  Middlewares = beamai_middleware_presets:development().
306  
307  %% 人工审批
308  Middlewares = beamai_middleware_presets:human_in_loop().
309  ```
310  
311  ### 预设内容对比
312  
313  | 预设 | call_limit | summarization | tool_retry | human_approval |
314  |------|------------|---------------|------------|----------------|
315  | default | ✓ | ✓ | - | - |
316  | minimal | ✓ | - | - | - |
317  | production | ✓ (严格) | ✓ | ✓ | - |
318  | development | ✓ (宽松) | ✓ (调试) | ✓ | - |
319  | human_in_loop | ✓ | ✓ | - | ✓ |
320  
321  ### 自定义预设选项
322  
323  ```erlang
324  %% 自定义 default 预设
325  Middlewares = beamai_middleware_presets:default(#{
326      call_limit => #{max_model_calls => 30},
327      summarization => #{window_size => 30}
328  }).
329  
330  %% 扩展预设
331  Middlewares = beamai_middleware_presets:default() ++ [
332      {my_custom_middleware, #{option => value}}
333  ].
334  ```
335  
336  ---
337  
338  ## 自定义 Middleware
339  
340  ### 基本结构
341  
342  ```erlang
343  -module(my_middleware).
344  -behaviour(beamai_middleware).
345  
346  %% 导出回调函数(所有回调都是可选的)
347  -export([init/1, before_agent/2, after_agent/2,
348           before_model/2, after_model/2,
349           before_tools/2, after_tools/2]).
350  
351  %% 初始化 Middleware 状态
352  init(Opts) ->
353      #{
354          my_option => maps:get(my_option, Opts, default_value),
355          counter => 0
356      }.
357  
358  %% Agent 开始前
359  before_agent(State, MwState) ->
360      %% State: 图状态 (graph_state)
361      %% MwState: Middleware 内部状态
362      ok.
363  
364  %% Agent 结束后
365  after_agent(State, MwState) ->
366      ok.
367  
368  %% LLM 调用前
369  before_model(State, MwState) ->
370      %% 示例:添加系统消息
371      Messages = graph_state:get(State, messages, []),
372      NewMsg = #{role => system, content => <<"Be concise.">>},
373      {update, #{messages => [NewMsg | Messages]}}.
374  
375  %% LLM 响应后
376  after_model(State, MwState) ->
377      ok.
378  
379  %% 工具执行前
380  before_tools(State, MwState) ->
381      %% 示例:检查危险工具
382      PendingTools = graph_state:get(State, pending_tools, []),
383      case contains_dangerous_tool(PendingTools) of
384          true -> {halt, dangerous_tool_blocked};
385          false -> ok
386      end.
387  
388  %% 工具执行后
389  after_tools(State, MwState) ->
390      ok.
391  ```
392  
393  ### 完整示例:调用计数器
394  
395  ```erlang
396  -module(middleware_counter).
397  -behaviour(beamai_middleware).
398  
399  -export([init/1, before_agent/2, before_model/2, after_agent/2]).
400  
401  %% 初始化
402  init(Opts) ->
403      #{
404          max_calls => maps:get(max_calls, Opts, 10),
405          current_calls => 0
406      }.
407  
408  %% Agent 开始 - 重置计数器
409  before_agent(_State, MwState) ->
410      %% 将计数器存储到图状态
411      {update, #{middleware_counter => 0}}.
412  
413  %% 模型调用前 - 检查并递增计数
414  before_model(State, #{max_calls := MaxCalls} = MwState) ->
415      Count = graph_state:get(State, middleware_counter, 0),
416      case Count >= MaxCalls of
417          true ->
418              logger:warning("Middleware: Call limit exceeded (~p/~p)", [Count, MaxCalls]),
419              {halt, {call_limit_exceeded, Count}};
420          false ->
421              {update, #{middleware_counter => Count + 1}}
422      end.
423  
424  %% Agent 结束 - 记录统计
425  after_agent(State, _MwState) ->
426      FinalCount = graph_state:get(State, middleware_counter, 0),
427      logger:info("Middleware: Total model calls: ~p", [FinalCount]),
428      ok.
429  ```
430  
431  ### 完整示例:请求日志
432  
433  ```erlang
434  -module(middleware_logger).
435  -behaviour(beamai_middleware).
436  
437  -export([init/1, before_model/2, after_model/2, before_tools/2, after_tools/2]).
438  
439  init(Opts) ->
440      #{
441          log_level => maps:get(log_level, Opts, info),
442          include_content => maps:get(include_content, Opts, false)
443      }.
444  
445  before_model(State, #{log_level := Level, include_content := IncludeContent}) ->
446      Messages = graph_state:get(State, messages, []),
447      case IncludeContent of
448          true ->
449              log(Level, ">>> LLM Request: ~p messages~n~p", [length(Messages), Messages]);
450          false ->
451              log(Level, ">>> LLM Request: ~p messages", [length(Messages)])
452      end,
453      %% 记录开始时间
454      {update, #{mw_model_start_time => erlang:system_time(millisecond)}}.
455  
456  after_model(State, #{log_level := Level}) ->
457      StartTime = graph_state:get(State, mw_model_start_time, 0),
458      Duration = erlang:system_time(millisecond) - StartTime,
459      Response = graph_state:get(State, last_llm_response, #{}),
460      Content = maps:get(content, Response, <<>>),
461      log(Level, "<<< LLM Response (~pms): ~p chars", [Duration, byte_size(Content)]),
462      ok.
463  
464  before_tools(State, #{log_level := Level}) ->
465      Tools = graph_state:get(State, pending_tools, []),
466      ToolNames = [maps:get(name, T, unknown) || T <- Tools],
467      log(Level, ">>> Tools to execute: ~p", [ToolNames]),
468      ok.
469  
470  after_tools(State, #{log_level := Level}) ->
471      Results = graph_state:get(State, tool_results, []),
472      log(Level, "<<< Tool results: ~p items", [length(Results)]),
473      ok.
474  
475  %% 内部日志函数
476  log(debug, Fmt, Args) -> logger:debug(Fmt, Args);
477  log(info, Fmt, Args) -> logger:info(Fmt, Args);
478  log(warning, Fmt, Args) -> logger:warning(Fmt, Args);
479  log(error, Fmt, Args) -> logger:error(Fmt, Args).
480  ```
481  
482  ### 完整示例:敏感词过滤
483  
484  ```erlang
485  -module(middleware_content_filter).
486  -behaviour(beamai_middleware).
487  
488  -export([init/1, after_model/2]).
489  
490  init(Opts) ->
491      #{
492          blocked_words => maps:get(blocked_words, Opts, []),
493          replacement => maps:get(replacement, Opts, <<"[FILTERED]">>),
494          action => maps:get(action, Opts, replace)  %% replace | block | warn
495      }.
496  
497  after_model(State, #{blocked_words := BlockedWords, replacement := Replacement, action := Action}) ->
498      Response = graph_state:get(State, last_llm_response, #{}),
499      Content = maps:get(content, Response, <<>>),
500  
501      case check_content(Content, BlockedWords) of
502          {found, Word} ->
503              case Action of
504                  block ->
505                      {halt, {blocked_content, Word}};
506                  warn ->
507                      logger:warning("Blocked word detected: ~p", [Word]),
508                      ok;
509                  replace ->
510                      FilteredContent = filter_content(Content, BlockedWords, Replacement),
511                      NewResponse = Response#{content => FilteredContent},
512                      {update, #{last_llm_response => NewResponse}}
513              end;
514          clean ->
515              ok
516      end.
517  
518  check_content(Content, BlockedWords) ->
519      LowerContent = string:lowercase(binary_to_list(Content)),
520      case lists:filter(fun(Word) ->
521          string:find(LowerContent, string:lowercase(binary_to_list(Word))) =/= nomatch
522      end, BlockedWords) of
523          [] -> clean;
524          [First|_] -> {found, First}
525      end.
526  
527  filter_content(Content, BlockedWords, Replacement) ->
528      lists:foldl(fun(Word, Acc) ->
529          binary:replace(Acc, Word, Replacement, [global])
530      end, Content, BlockedWords).
531  ```
532  
533  ---
534  
535  ## 配置和使用
536  
537  ### 在 Agent 配置中使用
538  
539  ```erlang
540  %% 方式 1:使用预设
541  {ok, Agent} = beamai_agent:start_link(<<"my_agent">>, #{
542      system_prompt => <<"You are helpful.">>,
543      llm => LLMConfig,
544      middlewares => beamai_middleware_presets:default()
545  }).
546  
547  %% 方式 2:手动配置
548  {ok, Agent} = beamai_agent:start_link(<<"my_agent">>, #{
549      system_prompt => <<"You are helpful.">>,
550      llm => LLMConfig,
551      middlewares => [
552          {middleware_call_limit, #{max_model_calls => 15}},
553          {middleware_summarization, #{window_size => 20}},
554          {my_custom_middleware, #{option => value}}
555      ]
556  }).
557  
558  %% 方式 3:混合配置
559  {ok, Agent} = beamai_agent:start_link(<<"my_agent">>, #{
560      middlewares => beamai_middleware_presets:production() ++ [
561          {middleware_logger, #{log_level => debug}}
562      ]
563  }).
564  ```
565  
566  ### Middleware 配置格式
567  
568  ```erlang
569  %% 完整格式:{模块, 选项, 优先级}
570  {middleware_call_limit, #{max_model_calls => 20}, 10}
571  
572  %% 省略优先级:{模块, 选项}(使用默认优先级 100)
573  {middleware_call_limit, #{max_model_calls => 20}}
574  
575  %% 仅模块名(使用默认选项和优先级)
576  middleware_call_limit
577  ```
578  
579  ### 优先级说明
580  
581  - 数值越小,越先执行
582  - 默认优先级为 100
583  - 推荐范围:
584    - 10-30: 前置检查(限制、验证)
585    - 40-60: 核心功能(审批、重试)
586    - 70-90: 后置处理(日志、监控)
587  
588  ---
589  
590  ## 高级用法
591  
592  ### 访问图状态
593  
594  ```erlang
595  before_model(State, MwState) ->
596      %% 读取状态
597      Messages = graph_state:get(State, messages, []),
598      Context = graph_state:get(State, context, #{}),
599  
600      %% 检查自定义键
601      MyData = graph_state:get(State, my_custom_key, undefined),
602  
603      %% 更新状态
604      {update, #{
605          messages => Messages ++ [NewMessage],
606          my_custom_key => NewValue
607      }}.
608  ```
609  
610  ### 流程控制
611  
612  ```erlang
613  %% 跳过工具执行,直接返回 LLM
614  before_tools(State, _MwState) ->
615      case should_skip_tools(State) of
616          true -> {goto, model};
617          false -> ok
618      end.
619  
620  %% 立即结束 Agent
621  after_model(State, _MwState) ->
622      case is_final_answer(State) of
623          true -> {goto, '__end__'};
624          false -> ok
625      end.
626  ```
627  
628  ### 中断和恢复
629  
630  ```erlang
631  %% 请求人工确认
632  before_tools(State, _MwState) ->
633      Tools = graph_state:get(State, pending_tools, []),
634      case needs_approval(Tools) of
635          true ->
636              {interrupt, #{
637                  type => tool_approval,
638                  data => #{tools => Tools},
639                  timeout => 60000
640              }};
641          false ->
642              ok
643      end.
644  ```
645  
646  ### Middleware 间通信
647  
648  ```erlang
649  %% 通过图状态共享数据
650  before_model(State, _MwState) ->
651      %% 设置供其他 Middleware 使用的数据
652      {update, #{shared_data => #{timestamp => erlang:system_time()}}}.
653  
654  after_model(State, _MwState) ->
655      %% 读取其他 Middleware 设置的数据
656      SharedData = graph_state:get(State, shared_data, #{}),
657      %% 使用 SharedData...
658      ok.
659  ```
660  
661  ---
662  
663  ## API 参考
664  
665  ### beamai_middleware 行为
666  
667  ```erlang
668  %% 所有回调都是可选的
669  -callback init(Opts :: map()) -> middleware_state().
670  -callback before_agent(State, MwState) -> middleware_result().
671  -callback after_agent(State, MwState) -> middleware_result().
672  -callback before_model(State, MwState) -> middleware_result().
673  -callback after_model(State, MwState) -> middleware_result().
674  -callback before_tools(State, MwState) -> middleware_result().
675  -callback after_tools(State, MwState) -> middleware_result().
676  ```
677  
678  ### beamai_middleware_runner
679  
680  ```erlang
681  %% 初始化 Middleware 链
682  -spec init([middleware_spec()]) -> middleware_chain().
683  
684  %% 执行钩子
685  -spec run_hook(hook_name(), graph_state(), middleware_chain()) -> run_result().
686  
687  %% 获取/设置 Middleware 状态
688  -spec get_middleware_state(module(), middleware_chain()) -> {ok, state()} | {error, not_found}.
689  -spec set_middleware_state(module(), state(), middleware_chain()) -> middleware_chain().
690  ```
691  
692  ### beamai_middleware_presets
693  
694  ```erlang
695  %% 预设配置
696  -spec default() -> [middleware_spec()].
697  -spec default(map()) -> [middleware_spec()].
698  -spec minimal() -> [middleware_spec()].
699  -spec production() -> [middleware_spec()].
700  -spec development() -> [middleware_spec()].
701  -spec human_in_loop() -> [middleware_spec()].
702  
703  %% 单独 Middleware 配置
704  -spec call_limit() -> middleware_spec().
705  -spec call_limit(map()) -> middleware_spec().
706  -spec summarization() -> middleware_spec().
707  -spec human_approval() -> middleware_spec().
708  -spec tool_retry() -> middleware_spec().
709  ```
710  
711  ---
712  
713  ## 更多资源
714  
715  - [beamai_agent README](../apps/beamai_agent/README.md)
716  - [API 参考](API_REFERENCE.md)
717  - [架构设计](ARCHITECTURE.md)