forked from YonkoSam/whatsapp-python-chatbot
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmessage_splitter.py
More file actions
151 lines (129 loc) · 5.85 KB
/
message_splitter.py
File metadata and controls
151 lines (129 loc) · 5.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import re
"""
message_splitter.py - Implementation of the message splitting functionality
"""
def split_message(text, max_lines=3, max_chars_per_line=100):
"""
The main message splitting function, used throughout the application.
This is just an alias to keep backwards compatibility.
"""
return split_message_impl(text, max_lines, max_chars_per_line)
def split_message_impl(text, max_lines=3, max_chars_per_line=100):
"""
Split a long message into smaller chunks for better WhatsApp readability.
This improved implementation properly handles long lines without newlines.
Args:
text: The text to split
max_lines: Maximum lines per message chunk
max_chars_per_line: Maximum characters per line
Returns:
List of message chunks ready to send
"""
if not text:
return []
# Convert escaped newlines and normalize line endings
normalized_text = text.replace('\\n', '\n').replace('\r\n', '\n')
# Remove standalone backslashes using regex
normalized_text = re.sub(r'\n\s*\\\s*\n', '\n', normalized_text)
normalized_text = re.sub(r'^\s*\\\s*\n', '', normalized_text)
normalized_text = re.sub(r'\n\s*\\\s*$', '', normalized_text)
# Split by existing newlines
paragraphs = normalized_text.split('\n')
chunks = []
current_chunk = []
current_line_count = 0
for paragraph in paragraphs:
# Handle empty paragraphs
if not paragraph.strip():
if current_line_count >= max_lines:
if current_chunk:
chunks.append('\n'.join(current_chunk))
current_chunk = []
current_line_count = 0
if current_line_count < max_lines:
current_chunk.append('')
current_line_count += 1
continue
# For paragraphs longer than max_chars_per_line, break them up
if len(paragraph) > max_chars_per_line:
words = paragraph.split()
# Special case: single very long word
if len(words) == 1:
word = words[0]
for i in range(0, len(word), max_chars_per_line):
if current_line_count >= max_lines:
if current_chunk:
chunks.append('\n'.join(current_chunk))
current_chunk = []
current_line_count = 0
current_chunk.append(word[i:i+max_chars_per_line])
current_line_count += 1
continue
# Regular case: paragraph with multiple words
current_line = []
current_length = 0
for word in words:
# Handle very long words
if len(word) > max_chars_per_line:
# Add accumulated words first
if current_line:
if current_line_count >= max_lines:
if current_chunk:
chunks.append('\n'.join(current_chunk))
current_chunk = []
current_line_count = 0
current_chunk.append(' '.join(current_line))
current_line_count += 1
current_line = []
current_length = 0
# Split the long word
for i in range(0, len(word), max_chars_per_line):
if current_line_count >= max_lines:
if current_chunk:
chunks.append('\n'.join(current_chunk))
current_chunk = []
current_line_count = 0
current_chunk.append(word[i:i+max_chars_per_line])
current_line_count += 1
# Normal word handling
elif current_length + len(word) + (1 if current_line else 0) > max_chars_per_line:
# Finalize current line
if current_line:
if current_line_count >= max_lines:
if current_chunk:
chunks.append('\n'.join(current_chunk))
current_chunk = []
current_line_count = 0
current_chunk.append(' '.join(current_line))
current_line_count += 1
# Start new line with this word
current_line = [word]
current_length = len(word)
else:
# Word fits on current line
if current_line:
current_length += 1 # space
current_line.append(word)
current_length += len(word)
# Add the last line if it exists
if current_line:
if current_line_count >= max_lines:
if current_chunk:
chunks.append('\n'.join(current_chunk))
current_chunk = []
current_line_count = 0
current_chunk.append(' '.join(current_line))
current_line_count += 1
else:
# Paragraph fits on one line
if current_line_count >= max_lines:
if current_chunk:
chunks.append('\n'.join(current_chunk))
current_chunk = []
current_line_count = 0
current_chunk.append(paragraph)
current_line_count += 1
# Add the final chunk
if current_chunk:
chunks.append('\n'.join(current_chunk))
return chunks