-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathzoom-chat-parser-3
More file actions
executable file
·179 lines (155 loc) · 6.67 KB
/
zoom-chat-parser-3
File metadata and controls
executable file
·179 lines (155 loc) · 6.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# zoom-chat-parser: Copyright © 2024-2025 Benjamin Holt - MIT License
from collections import deque
from dataclasses import dataclass, field
from pprint import pp
from helpers import match_test, pretty
from smallmachine import StateMachine
#####
### Input Fixture ###
chat_lines = """
15:01:39 From Michael M to Everyone:
When are we going to have AI run team summit?
15:01:45 From Jared R to Everyone:
Anyone still remember NFTs?
15:01:47 From Aaron S to Everyone:
Replying to "When are we going to..."
We already do
15:01:53 From Aaron S to Everyone:
Replying to "When are we going to..."
Gabe.ai
15:01:53 From Michael M to Everyone:
Reacted to "We already do" with 😂
15:03:00 From Matt T to Ben H:
Hi BenH!
15:04:53 From Aaron S to Ben H:
Replying to "When are we going to..."
Right??
15:07:18 From Becca S to Everyone:
Reacted to "Anyone still remember ..." with 👏🏼
15:07:20 From Becca S to Everyone:
Removed a 👏🏼 reaction from "Anyone still remember ..."
"""
#####
### Data Model ###
@dataclass
class ChatMessage:
time_str: str
sender: str
recipient: str
message_lines: list[str] = field(default_factory=list)
replies: list["ChatMessage"] = field(default_factory=list)
primary_message_prefix: str = None # Primary prefix is enough to identify the message it's replying to
# primary_message: "ChatMessage" = None # Could switch to this if we need to navigate up the tree
reactions: list[str] = field(default_factory=list)
#####
### Chat Parser ###
class ChatParser:
def __init__(self):
# Store messages LIFO to make finding previous message easy for replies and reactions
self.message_stack = deque()
self.current_message = None
blank_test = match_test(r"\s*$")
header_test = match_test(r"(?P<time_str>(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})) From (?P<from>.+?) to (?P<to>.+?):")
@pretty
def begin_message(result, **_):
# Push any previous message onto the stack and start a new one
if self.current_message:
self.message_stack.appendleft(self.current_message)
self.current_message = ChatMessage(
time_str=result.group("time_str"),
sender=result.group("from"),
recipient=result.group("to"),
)
reaction_test = match_test(r'\s+Reacted to "(?P<prefix>.+?)([.]{3})?" with (?P<reaction>.+)')
@pretty
def add_reaction(result, **_):
prefix = result.group("prefix")
reaction_pair = (result.group("reaction"), self.current_message.sender) # Store the sender for possible removals
previous_message = self.find_previous_message(prefix)
if previous_message and reaction_pair not in previous_message.reactions:
previous_message.reactions.append(reaction_pair)
self.current_message = None # Reactions don't contain actual messages
remove_reaction_test = match_test(r'\s+Removed a (?P<reaction>.+?) reaction from "(?P<prefix>.+?)([.]{3})?"')
@pretty
def remove_reaction(result, **_):
prefix = result.group("prefix")
reaction_pair = (result.group("reaction"), self.current_message.sender) # Only remove reactions from the same sender
previous_message = self.find_previous_message(prefix)
if previous_message and reaction_pair in previous_message.reactions:
previous_message.reactions.remove(reaction_pair)
# Ignore mismatched reaction removals
self.current_message = None # Reactions don't contain actual messages
reply_test = match_test(r'\s+Replying to "(?P<prefix>.+?)([.]{3})?"')
@pretty
def add_reply(result, **_):
prefix = result.group("prefix")
self.current_message.primary_message_prefix = prefix
primary_message = self.find_previous_message(prefix)
if primary_message:
# self.current_message.primary_message = primary_message
primary_message.replies.append(self.current_message)
message_line_test = match_test(r"\s+(?P<line>.*)")
@pretty
def add_message_line(result, **_):
l = result.group("line")
self.current_message.message_lines.append(l)
rules = {
"start": [
("header", header_test, begin_message, "message"),
("blank", blank_test, None, ...),
],
"message": [
# Reactions do not have any other lines, back to the top state
("reaction", reaction_test, add_reaction, "start"),
("remove_reaction", remove_reaction_test, remove_reaction, "start"),
("reply", reply_test, add_reply, "reply_blank"),
("line", message_line_test, add_message_line, "message_lines"),
],
"reply_blank": [
# Blank line between "Replying to" and message is part of the format, not part of the message
("blank", blank_test, None, "message_lines"),
("line", message_line_test, add_message_line, "message_lines"), # Found a case where reply did not have a blank line
],
"message_lines": [
("header", header_test, begin_message, "message"),
("line", message_line_test, add_message_line, ...),
("blank", blank_test, None, ...), # Ignore blank lines that are not part of the message (i.e. no indent)
],
}
self.parser = StateMachine(rules, "start", tracer=True)
def find_previous_message(self, prefix):
for m in self.message_stack:
if m.message_lines and m.message_lines[0].startswith(prefix):
return m
return None
@property
def threads(self):
"Returns message threads, oldest first"
if not self.message_stack:
return ()
return (
m for
m in reversed(chat_parser.message_stack)
# Threaded messages are included in the primary message
if m.primary_message_prefix is None
)
def __call__(self, lines):
for line in lines:
self.parser(line)
#####
### Main ###
if __name__ == "__main__":
import sys
chat_parser = ChatParser()
# pp(chat_parser.parser.status_dict())
chat_file = sys.argv[1] if len(sys.argv) > 1 else None
if chat_file:
with open(chat_file) as f:
chat_parser(f)
else:
chat_parser(chat_lines.split("\n"))
threads = chat_parser.threads
pp(tuple(threads))
#####