/ app / __pycache__ / client.cpython-312.pyc
client.cpython-312.pyc
  1  2  #��i�,��<�ddlmZddlZddlZddlZddlZddlmZddlm	Z	m
  3  Z
  4  mZddlZddl
mZddlmZmZddlmZmZddlmZdd	lmZmZmZej6e�Ze
  5  geeeeffZ Gd
  6  �de!�Z"ed�
�Gd�d��Z#Gd�d�Z$y)�)�annotationsN)�	dataclass)�Any�Callable�Iterable)�
MessageToDict)�vllm_engine_pb2�vllm_engine_pb2_grpc)�
AutoTokenizer�PreTrainedTokenizerBase)�discover_surface)�
FinalDecision�IncidentRequest�TriageUpdatec��eZdZdZy)�
VllmGrpcErrorz2Raised when a transport-level gRPC failure occurs.N)�__name__�
  7  __module__�__qualname__�__doc__���
app/client.pyrrs��<rrT)�slotsc�r�eZdZUded<ded<ded<ded<ded<d	ed
  8  <ded<ded
<ded<ded<y)�GenerationResult�str�
  9  request_id�selected_rpc�
 10  raw_output�list[TriageUpdate]�updates�int�chunk_count�float | None�time_to_first_update_ms�float�end_to_end_latency_ms�output_bytes_receivedz
 11  str | None�
finish_reason�dict[str, Any]�grpc_surfaceN)rrr�__annotations__rrrrrs<���O����O�
����)�)� � ����� � rrc���eZdZ	ddddd�									dd�Zdd�Zdd�Zdd�Zdd�Zdd	�Zdd
 12  �Z	dd�Z
 13  edd��Zedd
��Z
e												dd��Zdd�Zdddddd�													dd�Zy)�VllmGrpcClientg^@N)�timeout�metadata_factory�surfacec���||_||_||_|xst|��|_tj|gd���|_tj|j�|_
 14  d|_d|_y)N)�endpoint))zgrpc.max_send_message_length�����)zgrpc.max_receive_message_lengthr5)zgrpc.keepalive_time_msi N)zgrpc.keepalive_timeout_msi')�options)
r4r0r1r
r2�grpc�insecure_channel�_channelr
 15  �VllmEngineStub�_stub�
 16  _tokenizer�_model_info)�selfr4r0r1r2s     r�__init__zVllmGrpcClient.__init__*sm��!��
���� 0����E�"2�H�"E����-�-���
 17  ��
�*�8�8����G��
 18  �:>���26��rc�8�|jj�y�N)r9�close�r>s rrBzVllmGrpcClient.closeCs���
�
���rc�N�|j�yt|j��SrA)r1�listrCs r�	_metadatazVllmGrpcClient._metadataFs%��� � �(���D�)�)�+�,�,rc���|j�r|j�jnd}|j�xsd}tj	d|||�t|�d|�d|���S)N�UNKNOWNz
 19  no detailsz!gRPC transport error on %s: %s %sz
 failed with z: )�code�name�details�logger�errorr)r>�exc�rpc_namerIrKs     r�
 20  _rpc_errorzVllmGrpcClient._rpc_errorKsZ��"%�(�(�*�s�x�x�z���)���+�+�-�/�<�����8�(�D�'�R���z��t�f�B�w�i�H�I�Irc�
 21  �	|jjtj�|j|j���}t|d��S#tj$r}|j|d�|�d}~wwxYw)N�r0�metadata�HealthCheckT��preserving_proto_field_name)
 22  r;rTr	�HealthCheckRequestr0rFr7�RpcErrorrPr�r>�responserNs   r�health_checkzVllmGrpcClient.health_checkQsu��	?��z�z�-�-��2�2�4�������)�.��H��X�4�H�H���}�}�	?��/�/�#�}�5�3�>��	?���AA�B�*A=�=Bc�\�|j�|jS	|jjtj�|j
 23  |j
���}t|d��|_|jS#tj$r}|j|d�|�d}~wwxYw)NrR�GetModelInfoTrU)r=r;r^r	�GetModelInfoRequestr0rFr7rXrPrrYs   r�get_model_infozVllmGrpcClient.get_model_info\s������'��#�#�#�	@��z�z�.�.��3�3�5�������)�/��H�)��t�T���������}�}�	@��/�/�#�~�6�C�?��	@�s�AB�B+�B&�&B+c�
 24  �	|jjtj�|j|j���}t|d��S#tj$r}|j|d�|�d}~wwxYw)NrR�
GetServerInfoTrU)
 25  r;rbr	�GetServerInfoRequestr0rFr7rXrPrrYs   r�get_server_infozVllmGrpcClient.get_server_infojsx��	A��z�z�/�/��4�4�6�������)�0��H��X�4�H�H���}�}�	A��/�/�#��7�S�@��	A�r\c���|j�|jS|j�}|d}tjd|�t	j
 26  |d��|_|jS)N�
 27  model_pathz/Loading tokenizer from discovered model path %sT)�trust_remote_code)r<r`rL�infor�from_pretrained)r>�
 28  model_inforfs   r�_load_tokenizerzVllmGrpcClient._load_tokenizerus\���?�?�&��?�?�"��(�(�*�
 29  ���-�
 30  ����E�z�R�'�7�7�
 31  �VZ�[������rc�H��tj�}d�fd���|�S)Nc����t|t�r.|j�D��cic]\}}|dvr	|�|���c}}St|t�r|D�cgc]
 32  }�|���c}S|Scc}}wcc}w)N>�title�default�examples)�
 33  isinstance�dict�itemsrE)�node�key�value�_prunes   �rrwz1VllmGrpcClient._generation_schema.<locals>._prune�sv����$��%�'+�j�j�l��"��U��"B�B�����&���
 34  �$��%�37�8�%��u�
�8�8��K����9s�A(�A.)rtr�returnr)r�model_json_schema)�
 35  raw_schemarws @r�_generation_schemaz!VllmGrpcClient._generation_schema~s#���"�4�4�6�
 36  �		��j�!�!rc�n�tj|jd��d��}|rdnd}d|�d|�d	�S)
 37  N�json)�mode�)�indentz0Return exactly one JSON object and nothing else.zvStream concise updates, then emit the final JSON object between BEGIN_FINAL_DECISION_JSON and END_FINAL_DECISION_JSON.z�You are the enterprise Incident Commander for a live operations bridge.
 38  Use concise executive-safe language.
 39  Do not reveal chain-of-thought. Do not emit markdown or code fences.
 40  ae
 41  Populate fields in this order: incident_id, executive_summary, severity, suspected_root_cause, impacted_assets, confidence, recommended_actions, escalation_team, change_risk, machine_json_valid.
 42  Recommended actions must only use allowed_actions as the basis for actions.
 43  Set machine_json_valid to true if your final JSON object is valid.
 44  Incident payload:
 45  �
 46  )r}�dumps�
 47  model_dump)�incident�
strict_schema�
incident_blob�contracts    r�build_promptzVllmGrpcClient.build_prompt�sZ���
 48 49  �8�#6�#6�F�#6�#C�A�N�
��
?�I�		�
U��j�"��o�R�
 50  
!�	
 51  rc
 52  ��gd�}g}|D]C\}}||vs�||vs�|j|�|jt||d|�d�||����E|S)N))�executive_summary�situation_assessment)�suspected_root_cause�probable_root_cause)�recommended_actionsr�)�machine_json_valid�final_decision_jsonzReached z" section in streamed JSON payload.��incident_id�stage�text�
 53  elapsed_ms�bytes_received)�add�appendr)	r��accumulated_textr�r��emitted_stages�
stage_markersr"�markerr�s	         r�_stage_updateszVllmGrpcClient._stage_updates�su��
 54  �
�')��*�	�M�F�E��)�)�e�>�.I��"�"�5�)���� �$/�#�'��w�.P�Q�#-�'5���	��rc�b�|jdD]}|dD]}|ddk(s�|dccS�y)N�services�methodsrJ�Generate�	full_namez%/vllm.grpc.engine.VllmEngine/Generate)r2)r>�service�methods   r�_selected_generate_rpcz%VllmGrpcClient._selected_generate_rpc�sJ���|�|�J�/�	/�G�!�)�,�
/���&�>�Z�/�!�+�.�.�
/�	/�7rTi�)�stream�
 55  max_tokensr0�on_text�	on_updatec��|j�}|j�dtj�jdd��}|j�}	|j
|d��}
 56  tj||
 57  |tjdd|tj|j�d���	��
 58  �}g}t|jdd|	��dd
��g}
|�||
d
�t�}d
}d
}d}tj �}d}	|j"j%||xs|j&|j)���}|D�]}||j+�z
}tj �|z
 59  dz}|j-d�r�|dz
}|�|}|j/|j0j2dd��}|j5|�|�||�dj7|�}|j9|j||||��}|
j;|�|���|D]
 60  }||����|j-d�s��|j<j>xsd}��	tG||	dj7|�|
||tj �|z
 61  dz|||jH��
 62  S#t@jB$r}|jE|d�|�d}~wwxYw)N�-�T)r�gg�?)�,�:)�
 63  separators)�temperature�top_pr��json_schema)rr�r��sampling_params�	transportzOpened gRPC stream via rr�rRi��chunk�F)�skip_special_tokens�clean_up_tokenization_spaces�)r�r�r�r�r��completer�)
 64  rrr r"r$r&r(r)r*r,)%rkr��uuid�uuid4�hexr�r�r	�GenerateRequest�SamplingParamsr}r�r{r�set�time�perf_counterr;r�r0rF�ByteSize�HasField�decoder��	token_idsr��joinr��extendr�r*r7rXrPrr2)r>r�r�r�r0r�r��	tokenizerrr�prompt�request�	raw_partsr"r�r�r$r*�
 65  started_at�first_update_ms�	responsesrZr��decoded�accumulated�new_updates�updaterNs                            r�generate_incidentz VllmGrpcClient.generate_incident�s����(�(�*�	� �,�,�-�Q�t�z�z�|�/?�/?���/C�.D�E�
 66  ��2�2�4���"�"�8�4�"�@��!�1�1�!���+�:�:���%� �J�J�t�'>�'>�'@�Z�X�	�	
 67  
 68  �� "�	��$�0�0�!�.�|�n�=�� �
�
 69  ��� ��g�a�j�!�#&�5������$(�
��&�&�(�
 70  �(,��$	<��
 71 72  �+�+���/�4�<�<����)�,��I�
 73  &�
L���(�"3�"3�"5�5��"�/�/�1�J�>�$�F�
 74  ��$�$�W�-��1�$�K�&�.�*4��'�.�.� ���0�0�,1�5:�/��G�
 75  �$�$�W�-��*���(�"$�'�'�)�"4�K�"&�"5�"5�$,�$8�$8�)4�#-�'5�'5�#6�#�K��N�N�;�/� �,�&1�.�F�%�f�-�.��&�&�z�2�$,�$5�$5�$C�$C�$K�t�M�9
L�@ �!�%��w�w�y�)��#�$3�#'�#4�#4�#6��#C�t�"K�"0�'����
 76 77  ���}�}�	<��/�/�#�z�2��;��	<�s%�DJ �!J �<J � K�3K�K)zlocalhost:8000)
 78  r4rr0r'r1zMetadataFactory | Noner2zdict[str, Any] | Nonerx�None)rxr�)rxzlist[tuple[str, str]] | None)rNz
grpc.RpcErrorrOrrxr)rxr+)rxr)r�rr��boolrxr)r�rr�rr�r'r�r#r�zset[str]rxr!)rxr)r�rr�r�r�r#r0r%r�zCallable[[str], None] | Noner�z%Callable[[TriageUpdate], None] | Nonerxr)rrrr?rBrFrPr[r`rdrk�staticmethodr{r�r�r�r�rrrr/r/)sM��)�7��37�)-�
7��7��	7�
 79  1�7�'�
7�
 80  �7�2�-�
 81  J�	I� �	I���"��"� �
 82  ��
 83  �0���������	�
 84  !��
 85  �
���87��� $�04�;?�^
 86  �!�^
 87  ��	^
 88 89  �^
 90  ��
^
 91  �.�^
 92  �9�^
 93  �
 94  �^
 95  rr/)%�
 96  __future__rr}�loggingr�r��dataclassesr�typingrrrr7�google.protobuf.json_formatr�smg_grpc_proto.generatedr	r
 97  �transformersrr�app.discover_vllm_grpcr
�app.schemasrrr�	getLoggerrrL�tupler�MetadataFactory�RuntimeErrorrrr/rrr�<module>r�s���"�����!�*�*��5�J�?�3�D�D�	��	�	�8�	$���2�x��c�3�h��8�8�9��=�L�=����
 98  !�
 99  !��
100  !�A
101  �A
102  r