/ core / attention / __pycache__ / transcript_miner.cpython-314.pyc
transcript_miner.cpython-314.pyc
  1  +
  2  A:gia>��
  3  �Rt^RIHtHt^RIHtHt^RIHt^RIH	t	H
  4  t
  5  HtHtH
t
Ht^RIHt^RIt^RIt^RIHtHtHt]!RR	44t]!R
  6  R44t]!RR
44t!RR4tRRlt]R8Xd^RIt]!R4Rt]!]4t ^�t!]"!]PF4^8�d]$!]PF^,4t!]!R]!R24] PK]!R7t&]!]&PO44R#R#)a
  7  Transcript Miner - Extract insights from historical conversations.
  8  
  9  Your 7 days of transcripts are rich with:
 10  1. Signal words showing what you value
 11  2. Decision patterns
 12  3. Topic trajectories
 13  4. Validation-seeking moments (potential insights to mine)
 14  5. Language patterns for personalization
 15  
 16  This module:
 17  - Parses session transcripts
 18  - Extracts and categorizes signal words
 19  - Identifies unmined insights (high-weight items not yet integrated)
 20  - Builds topic maps showing conceptual relationships
 21  - Learns your personal vocabulary/patterns
 22  )�	dataclass�field)�datetime�	timedelta)�Path)�Optional�List�Dict�Tuple�Set�Any)�defaultdictN)�SignalWordDetector�SignalDetection�detect_signalsc�ba�]tRt^toRtRtRtRtRt]	!]
 23  R7tRtRt
RtRtV3RltRtVtR#)	�TranscriptAtomz(A single message/atom from a transcript.N�g�?��default_factoryFc��<�V^8�dQh/S[;R&S[;R&S[S[,;R&S[;R&S[S[,;R&S[;R&S[S[,;R&S[;R&S[;R	&S[;R
 24  &S[;R&#)��content�speaker�	timestamp�
 25  session_id�signals�weight�tags�
 26  is_insight�is_decision�is_principle�needs_integration)�strrrr�floatr�bool)�format�
__classdict__s"��B/Users/rcerf/repos/Sovereign_OS/core/attention/transcript_miner.py�__annotate__�TranscriptAtom.__annotate__s������L���L�	�
 27  ��!�(����
��o�
&�-��
����c�(�/�������� ��!�"�#�#��)�__name__�
 28  __module__�__qualname__�__firstlineno__�__doc__rrrrr�setrrr r!r"�__annotate_func__�__static_attributes__�__classdictcell__�r's@r(rrsH����2�%)�I��J�*.�G��F��3�/�D��J��K��L�#��#�r+rc�da�]tRt^2toRt]!]R7tRt^t	^t
 29  ^t]!]R7t
V3RltRtVtR#)�TopicClusterz,A cluster of related content around a topic.r�c�<�V^8�dQh/S[;R&S[S[,;R&S[;R&S[;R&S[;R&S[;R&S[S[S[3,;R&#)r�topic�atoms�total_weight�decision_count�
insight_count�principle_count�related_topics)r#rrr$�intr	)r&r's"�r(r)�TopicCluster.__annotate__2sl�����J�����=�	�
 30  �����
���������c��N�@�r+r,N)r-r.r/r0r1r�listr<r=r>r?r@�dictrAr3r4r5r6s@r(r8r82s9����6�"'��"=�E��L��N��M��O�&+�4�%@�N��r+r8c�a�]tRt^@toRt^t^tRt]!]	R7t
 31  ]!]	R7t]!]	R7t]!]
R7t]!]
R7t]!]	R7tRtV3RlRltV3RltRtVtR#)	�MiningReportz Results from mining transcripts.r9rc� <�V^8�dQhRS[/#)r�return�r#)r&r's"�r(r)�MiningReport.__annotate__Ts���8 �8 �S�8 r+c
 32  �b�RRVPRVPRVPR
R2R.pVP'd�VP	R4VP	R	4VP	R4VPR
 33  ,FipVP
 34  'dRP
VP
 35  4MRpVP	R
VPR
RVPR,RVR24Kk	VP	R4VP'dcVP	R4VPR
 36  ,F)pVP	RVPR,R24K+	VP	R4VP'dcVP	R4VPR
 37  ,F)pVP	RVPR,R24K+	VP	R4VP'EdVP	R4\VPP4RRR7pVR
 38  ,F�pVP	RVP24VP	RVPR
RVP RVP"24VP$'dX\VP$P'4R R!7R",pVP	R#RP
R$V4424VP	R4K�	VP('dvVP	R%4VP(R&,FwrxVP	RVR'VR(24K	VP	R4VP	R)VP*R*
24R+P
V4#),zGenerate markdown report.z# Transcript Mining Report�*z sessions, z atoms, z.1fz hours*rz## Unmined Insightsz'*High-weight items not yet integrated:*:N�
 39  Nz, �nonez- [z.2fz] :N�dNz... (tags: �)z## Principles Identifiedz- :N�Nz...z## Decisions Madez## Topic Clustersc��VP#�N)r=)�cs&r(�<lambda>�*MiningReport.to_markdown.<locals>.<lambda>ys��a�n�nr+T��key�reversez### zWeight: z, Insights: z
, Decisions: c��V^,)#��r,��xs&r(rVrW�s��TU�VW�TX�SXr+)rY:N�Nz	Related: c3�*"�TF	wrVx�K	R#5irTr,)�.0�trUs&  r(�	<genexpr>�+MiningReport.to_markdown.<locals>.<genexpr>�s���6M�W�T�Q�q�W�s�z## Your Signal Word Patterns:N�Nz: z timeszValidation-seeking rate: z.1%�
 40  )�sessions_analyzed�atoms_processed�time_window_hours�unmined_insights�appendr�joinrr�
 41  principles�	decisions�topic_clusters�sorted�valuesr;r=r?r>rA�items�your_common_signals�validation_seeking_rate)	�self�lines�atomr�sorted_clusters�cluster�related�signal�counts	&        r(�to_markdown�MiningReport.to_markdownTs7��
)���&�&�'�{�4�3G�3G�2H��QU�Qg�Qg�hk�Pl�ls�t��
 42  ��� � � ��L�L�.�/��L�L�B�C��L�L����-�-�c�2�2��/3�y�y�y�t�y�y����+�f�����s�4�;�;�s�"3�2�d�l�l�4�6H�5I��UY�TZ�Z[�\�]�3�
�L�L����?�?�?��L�L�3�4�����,�,�����r�$�,�,�t�"4�!5�S�9�:�-��L�L����>�>�>��L�L�,�-����s�+�+�����r�$�,�,�t�"4�!5�S�9�:�,��L�L��������L�L�,�-�$��#�#�*�*�,�,���O�
 43  +�3�/�/�����t�G�M�M�?�3�4����x��(<�(<�S�'A��g�Nc�Nc�Md�dq�ry�sI�sI�rJ�K�L��)�)�)�$�W�%;�%;�%A�%A�%C��Y�Z\�]�G��L�L�9�T�Y�Y�6M�W�6M�-M�,N�!O�P����R� �
0��#�#�#��L�L�7�8�!%�!9�!9�#�!>�!>�
�����r�&���E�7�&�9�:�"?��L�L����L�L�4�T�5Q�5Q�RU�4V�W�X��y�y���r+c�<�V^8�dQh/S[;R&S[;R&S[;R&S[S[,;R&S[S[,;R&S[S[,;R&S[S[S[3,;R&S[S[S[3,;R&S[S[S[S[3,,;R	&S[;R
 44  &#)rrhrirjrkrnro�signal_word_countsrprtru)rBr$rrr	r#r8r
 45  )r&r's"�r(r)rK@s����������	�
 46  �"���>�*�H���^�$�B���N�#�A���S�#�X��D����l�*�+�I��"�e�C��H�o�.�L�#�$#�(�%r+r,N)r-r.r/r0r1rhrirjrrDrkrnrorEr�rprtrur~r3r4r5r6s@r(rGrG@su����*����O�"��.3�4�-H��',�T�'B�J�&+�D�&A�I�*/�t�)D��.3�D�.I�N�27�t�1L��%(��8 �8 �)�r+rGc��a�]tRt^�toRtRR3.t.ROtV3RlRltRV3RlRlltV3R	lR
 47  lt	V3RlRlt
 48  V3R
lRltV3RlRltV3RlRlt
V3RlRltV3RlRltRtVtR#)�TranscriptMinerap
 49  Mines session transcripts for insights, patterns, and unmined content.
 50  
 51  Usage:
 52      miner = TranscriptMiner(sessions_dir)
 53      report = miner.mine(hours_back=168)  # 7 days
 54  
 55      # Get unmined insights
 56      for insight in report.unmined_insights:
 57          print(f"[{insight.weight}] {insight.content}")
 58  
 59      # Export topic map
 60      miner.export_topic_map("topic_map.json")
 61  zT\*\*\[(\d{2}:\d{2}:\d{2})\]\s+(Rick|Claude)\*\*\s*\n\s*-\s+(.+?)(?=\n\n|\n\*\*\[|\Z)c�f�VP^4VP^4VP^43#)r)�group)�ms&r(rV�TranscriptMiner.<lambda>�s#��A�G�G�A�J�����
 62  �A�G�G�A�J�7r+c� <�V^8�dQhRS[/#)r�sessions_dirrJ)r&r's"�r(r)�TranscriptMiner.__annotate__�s���	?�	?�S�	?r+c��\V4Vn\4Vn.Vn\R4Vn\\4VnR#)c��\RR7#)r�r;)r8r,r+r(rV�*TranscriptMiner.__init__.<locals>.<lambda>�s	��L�r�*r+N)	rr�r�signal_detector�_atomsr
�_topic_clustersrB�_signal_counts)rvr�s&&r(�__init__�TranscriptMiner.__init__�sB�� ��.���1�3���-/���8C�*�9
 63  ���/:�#�.>��r+Nc�2<�V^8�dQhRS[RS[RS[RS[/#)r�
 64  hours_back�speaker_filter�
 65  min_weightrI)r$r#rG)r&r's"�r(r)r��s9���-A�-A��-A��-A��	-A�
 66  
 67  �-Ar+c�l�\P!4\VR7,
 68  p\VPPR44p^pVF�p\P!VP4P4pW�8dK9V^,
pVPp	VP4p
 69  VPW�4pVF]pV'dVPV8wdKVPV8dK1VPPV4VP!V4K_	K�	VP#Wa4#)a
 70  Mine transcripts from the specified time window.
 71  
 72  Args:
 73      hours_back: How far back to look
 74      speaker_filter: Only analyze this speaker's messages
 75      min_weight: Only include atoms above this weight
 76  
 77  Returns:
 78      MiningReport with insights, patterns, and topic clusters
 79  )�hoursz	*-live.md)r�nowrrDr��glob�
fromtimestamp�stat�st_mtime�stem�	read_text�_extract_atomsrrr�rl�_process_atom_for_clusters�
_build_report)
rvr�r�r��cutoff�
session_filesrh�session_file�mtimerrr<rxs
&&&&         r(�mine�TranscriptMiner.mine�s��"����)�*�"=�=���T�.�.�3�3�K�@�A�
���)�L��*�*�<�+<�+<�+>�+G�+G�H�E��~����"��%�*�*�J�#�,�,�.�G��'�'��<�E���!�d�l�l�n�&D���;�;��+�����"�"�4�(��/�/��5��*�,�!�!�"3�@�@r+c�<<�V^8�dQhRS[RS[RS[S[,/#)rrrrI�r#rr)r&r's"�r(r)r��s'���1�1�c�1�s�1�t�N�?S�1r+c�Z�.pVPR4pRp.pRpVEF>p\P!RV4p	V	'dsV'dEV'd=VPRP	V4VVV4p
 80  V
 81  'dVPV
 82  4V	P
^4pV	P
^4p.pK�V'gK�VP4pVPR4'gK�VPR4'dK�\P!RRV4pVPR4'dEKVPR	4'dEK-VPV4EKA	V'dEV'd=VPRP	V4VVV4p
 83  V
 84  'dVPV
 85  4V#)
 86  z,Extract atoms from session markdown content.rgNz7^\s*-\s*\*\*\[(\d{2}:\d{2}:\d{2})\]\s+(Rick|Claude)\*\*�-z- {z^-\s*rz<!--z{'type')
 87  �split�re�match�_create_atomrmrlr��strip�
 88  startswith�sub)
rvrrr<rw�current_speaker�current_content�current_timestamp�line�
speaker_matchrx�stripped�texts
&&&          r(r��TranscriptMiner._extract_atoms�sW�����
�
�d�#������ ���D��H�H�%_�ae�f�M��"���,�,��	�	�/�2�'�)�"�	�D�����T�*�$1�$7�$7��$:�!�"/�"5�"5�a�"8��"$�� ���:�:�<���&�&�s�+�+�H�4G�4G��4N�4N��6�6�(�B��9�D��?�?�6�2�2�4�?�?�;�;W�;W�'�.�.�t�4�5�:���$�$��	�	�/�*��!��	�D�����T�"��r+c�H<�V^8�dQhRS[RS[RS[RS[RS[S[,/#)rrr�
timestamp_strrrI)r#rr)r&r's"�r(r)r�sB���)�)��)��)��	)�
 89  �)�
 90  �.�	!�
)r+c	���V'd\V4^
 91  8dR#\P!RRV4p\P!RRV\PR7pVP	4pV'gR#VP
 92  P
V4pVPF.wrgVPVP;;,^,
uu&K0	\VVVVVPVPP4R7pVPR8�;'dRVP9VnVPVnR	VP9VnVPR
 93  8�;'dVR8HVnV#)z-Create a TranscriptAtom with signal analysis.Nz\^[a-f0-9]+rz
 94  <!--.*?-->)�flags)rrrrrrg�������?�needs_validation�	principle�333333�?�Rick)�lenr�r��DOTALLr�r��detect�
signals_foundr��patternr�weight_modifier�suggested_tags�copyrr r!r")	rvrrr�rrr|r�rxs	&&&&&    r(r��TranscriptMiner._create_atoms:���#�g�,��+���&�&���W�5���&�&���G�2�9�9�E���-�-�/�����&�&�-�-�g�6��%�2�2�M�F�������/�1�4�/�3����!���*�*��'�'�,�,�.�

 95  ��"�1�1�C�7�h�h�<N�RY�Rh�Rh�<h���"�.�.���'�7�+A�+A�A���!(�!8�!8�3�!>�!T�!T�7�f�CT����r+c�$<�V^8�dQhRS[RR/#)rrxrIN)r)r&r's"�r(r)r�Es���i�i�~�i�$�ir+c���VPVP4pVEF@pW0P9d\VR7VPV&VPV,pVPPV4V;PVP,
unVP'dV;P^,
un	VP'dV;P^,
unVP'dV;P^,
un
VF;pWS8wgKVPPV^4^,VPV&K=	EKC	R#)z$Add atom to relevant topic clusters.r�N)�_extract_topicsrr�r8r<rlr=rr r>rr?r!r@rA�get)rvrx�topicsr;rz�other_topics&&    r(r��*TranscriptMiner._process_atom_for_clustersEs	���%�%�d�l�l�3���E��0�0�0�.:��.G��$�$�U�+��*�*�5�1�G��M�M� � ��&�� � �D�K�K�/� ������&�&�!�+�&������%�%��*�%�� � � ��'�'�1�,�'� &���'�:A�:P�:P�:T�:T�U`�bc�:d�gh�:h�G�*�*�;�7� &�!r+c�6<�V^8�dQhRS[RS[S[,/#)rrrI)r#r)r&r's"�r(r)r�]s���
�
�s�
�s�3�x�
r+c��\4pVP4pVPF*p\P!WC4pVPV4K,	0RmpVUu0uFqwV9gK\
V4^8�gKVkK!	ppV#uupi)zExtract topics from content.>	�and�for�the�been�from�have�that�this�with)r2�lower�TOPIC_PATTERNSr��findall�updater�)rvrr��
content_lowerr��matches�
 96  stop_wordsrcs&&      r(r��TranscriptMiner._extract_topics]sx������
�
��
��*�*�G��j�j��8�G��M�M�'�"�+�
 97  [�
 98  �#�J�V��
 99  �':�!�s�1�v��z�!�!�V��J��
��Ks�B�*
B�<Bc�,<�V^8�dQhRS[RS[RS[/#)rrhr�rI)rBr$rG)r&r's"�r(r)r�ls"���*�*�s�*��*�,�*r+c�X�\V\VP4VR7pVPF�pVP'd-VPR8�dVP
100  P
V4VP'dVPP
V4VP'gK�VPP
V4K�	VP
101  PRRR7VPPRRR7VPPRRR7\VP4Vn\VP4Vn\#VPP%4RRR7VnVPUu.uFqUP(R	8XgKVNK	ppVUu.uF6pVP*'gKR
102  VP*P,9gK4VNK8	ppV'd!\V4\V4,VnV#uupiuupi)zBuild the mining report.)rhrirjr�c��VP#rT�r��as&r(rV�/TranscriptMiner._build_report.<locals>.<lambda>~s��1�8�8r+TrXc��VP#rTr�r�s&r(rVr�s��Q�X�Xr+c��VP#rTr�r�s&r(rVr��s��A�H�Hr+c��V^,#r\r,r^s&r(rVr��s��!�A�$r+r�r�)rGr�r�r"rrkrlr!rnr ro�sortrEr�rpr�r�rqrsrtrrr�ru)rvrhr��reportrxr��
103  rick_atoms�validation_seekings&&&     r(r��TranscriptMiner._build_reportls����/�����,�(�
104  ���K�K�D��%�%�%�$�+�+��*;��'�'�.�.�t�4�� � � ��!�!�(�(��.������ � �'�'��-�
 �	���$�$�);�T�$�J������#5�t��D������"4�d��C�!%�T�%9�%9� :���%)��)<�)<�$=��!�%+����%�%�'���&
105  ��"�"&���D��A�	�	�V�0C�a�a��
106  �D�!�
107  �!�!��y�y�
�/�1�9�9�3K�3K�K�
�A�z�	�
108  ��-0�1C�-D�s�:��-V�F�*��
��E��
109  s�H"�.H"�:H'�H'�0H'c�$<�V^8�dQhRS[RR/#)r�output_pathrINrJ)r&r's"�r(r)r��s���E�E�C�E�D�Er+c
���.p.pVPP4F�wrEVPRVRVPRVPRVP
110  RVP/4VPP4F%wrgV^8�gKVPRVRVRV/4K'	K�	\VR4;_uu_4p\P!R	VR
111  V/V^R7RRR4R# +'giR#;i)
z+Export topic map as JSON for visualization.�idr�insightsrorn�source�target�w�nodes�edges)�indentN)r�rsrlr=r?r>r@rA�open�json�dump)	rvr�rrr;rzr{r}�fs	&&       r(�export_topic_map� TranscriptMiner.export_topic_map�s�������"�2�2�8�8�:�N�E��L�L��e��'�.�.��G�1�1��W�3�3��g�5�5��
�#*�"8�"8�">�">�"@����1�9��L�L� �%� �'� �%�"��#A�;�"�+�s�
#�
#�q��I�I�w��w��6��!�D�$�
#�
#�
#�s�C*�*C;	c�6<�V^8�dQhRS[RS[S[,/#)rr;rIr�)r&r's"�r(r)r��s ��������n�1E�r+c�`�WP9dVPV,P#.#)z!Get all atoms related to a topic.)r�r<)rvr;s&&r(�get_atoms_for_topic�#TranscriptMiner.get_atoms_for_topic�s*���(�(�(��'�'��.�4�4�4��	r+)r�r�r�r�r�)z#(\w+)z\b([a-z]+_[a-z_]+)\bz=\b(attention|context|stream|session|coherence|membrane|aha)\b)�Nr9)r-r.r/r0r1�
ATOM_PATTERNSr�r�r�r�r�r�r�r�rrr4r5r6s@r(r�r��s�����
�$
112  a�	7�	9��M��N�	?�	?�-A�-A�^1�1�f)�)�Vi�i�0
�
�*�*�XE�E�2�r+r�c�0�V^8�dQhR\R\/#)rr�rI)r#r�)r&s"r(r)r)�s��)�)�#�)�/�)r+c��\V4#)zCreate a transcript miner.)r�)r�s&r(�create_transcript_minerr�s
���<�(�(r+�__main__z=== Transcript Miner ===
113  z3/Users/rcerf/repos/Sovereign_Estate/daily/sessions/zMining last z	 hours...)r�)(r1�dataclassesrrrr�pathlibr�typingrrr	r
114  rr�collectionsr
r�r�signal_wordsrrrrr8rGr�rr-�sys�printr��minerr�r��argvr$r�r�r~r,r+r(�<module>rs���$)�(��8�8�#�	��M�M��$�$��$�&�
115  A�
116  A��
117  A��K �K ��K �\f�f�R	)�
118  �z���	�
119  &�'�H�L��L�)�E��E�
120  �3�8�8�}�q���c�h�h�q�k�"��	�L���y�
121  )�*�
�Z�Z�5�Z�
)�F�	�&�
122  �
123  �
124  ��r+