From fe7e5385e814121771d28098854e561366a1c25d Mon Sep 17 00:00:00 2001
From: ggurdin <46800240+ggurdin@users.noreply.github.com>
Date: Wed, 13 Aug 2025 10:50:40 -0400
Subject: [PATCH] fix: group adjacent punctuation tokens with content tokens to
prevent line breaks, added token positions cache (#3713)
---
lib/pages/chat/events/html_message.dart | 23 +++--
.../token_position_model.dart | 87 ++++++++++++++++++-
.../widgets/stt_transcript_tokens.dart | 3 +-
3 files changed, 104 insertions(+), 9 deletions(-)
diff --git a/lib/pages/chat/events/html_message.dart b/lib/pages/chat/events/html_message.dart
index ef945d711..393a1fa46 100644
--- a/lib/pages/chat/events/html_message.dart
+++ b/lib/pages/chat/events/html_message.dart
@@ -14,6 +14,7 @@ import 'package:fluffychat/pages/chat/chat.dart';
import 'package:fluffychat/pangea/events/event_wrappers/pangea_message_event.dart';
import 'package:fluffychat/pangea/events/models/pangea_token_model.dart';
import 'package:fluffychat/pangea/message_token_text/message_token_button.dart';
+import 'package:fluffychat/pangea/message_token_text/token_position_model.dart';
import 'package:fluffychat/pangea/toolbar/enums/reading_assistance_mode_enum.dart';
import 'package:fluffychat/pangea/toolbar/utils/token_rendering_util.dart';
import 'package:fluffychat/pangea/toolbar/widgets/message_selection_overlay.dart';
@@ -157,7 +158,7 @@ class HtmlMessage extends StatelessWidget {
pangeaMessageEvent?.messageDisplayRepresentation?.tokens
?.where(
(t) =>
- !["PUNCT", "SYM"].contains(t.pos) &&
+ !["SYM"].contains(t.pos) &&
!t.lemma.text.contains(RegExp(r'[0-9]')) &&
t.lemma.text.length <= 50,
)
@@ -209,17 +210,25 @@ class HtmlMessage extends StatelessWidget {
}
int position = 0;
- for (final PangeaToken token in tokens ?? []) {
- final String tokenText = token.text.content;
+ final tokenPositions = tokens != null
+ ? TokensUtil.getAdjacentTokenPositions(event.eventId, tokens!)
+ : [];
+
+ for (final TokenPosition tokenPosition in tokenPositions) {
+ final String tokenSpanText = tokens!
+ .sublist(tokenPosition.startIndex, tokenPosition.endIndex + 1)
+ .map((t) => t.text.content)
+ .join();
+
final substringIndex = result.indexWhere(
(string) =>
- string.contains(tokenText) &&
+ string.contains(tokenSpanText) &&
!(string.startsWith('<') && string.endsWith('>')),
position,
);
if (substringIndex == -1) continue;
- int tokenIndex = result[substringIndex].indexOf(tokenText);
+ int tokenIndex = result[substringIndex].indexOf(tokenSpanText);
if (tokenIndex == -1) continue;
final beforeSubstring = result[substringIndex].substring(0, tokenIndex);
@@ -227,7 +236,7 @@ class HtmlMessage extends StatelessWidget {
tokenIndex = beforeSubstring.characters.length;
}
- final int tokenLength = tokenText.characters.length;
+ final int tokenLength = tokenSpanText.characters.length;
final before =
result[substringIndex].characters.take(tokenIndex).toString();
final after = result[substringIndex]
@@ -237,7 +246,7 @@ class HtmlMessage extends StatelessWidget {
result.replaceRange(substringIndex, substringIndex + 1, [
if (before.isNotEmpty) before,
- '$tokenText',
+ '$tokenSpanText',
if (after.isNotEmpty) after,
]);
diff --git a/lib/pangea/message_token_text/token_position_model.dart b/lib/pangea/message_token_text/token_position_model.dart
index 5e8c9b57b..cd5a47355 100644
--- a/lib/pangea/message_token_text/token_position_model.dart
+++ b/lib/pangea/message_token_text/token_position_model.dart
@@ -13,7 +13,82 @@ class TokenPosition {
}
class TokensUtil {
- static List getTokenPositions(
+ /// A cache of calculated adjacent token positions
+ static final Map _tokenPositionCache = {};
+
+ static const Duration _cacheDuration = Duration(minutes: 1);
+
+ static List? _getCachedTokenPositions(String eventID) {
+ final cacheItem = _tokenPositionCache[eventID];
+ if (cacheItem == null) return null;
+ if (cacheItem.timestamp.isBefore(DateTime.now().subtract(_cacheDuration))) {
+ _tokenPositionCache.remove(eventID);
+ return null;
+ }
+
+ return cacheItem.positions;
+ }
+
+ static void _setCachedTokenPositions(
+ String eventID,
+ List positions,
+ ) {
+ _tokenPositionCache[eventID] = _TokenPositionCacheItem(
+ positions,
+ DateTime.now(),
+ );
+ }
+
+ /// Given a list of tokens, returns a list of positions for tokens and adjacent punctuation
+ /// This list may include gaps in the actual message for non-token elements,
+ /// so should not be used to fully reconstruct the original message.
+ static List getAdjacentTokenPositions(
+ String eventID,
+ List tokens,
+ ) {
+ final cached = _getCachedTokenPositions(eventID);
+ if (cached != null) {
+ return cached;
+ }
+
+ final List positions = [];
+ for (int i = 0; i < tokens.length; i++) {
+ final PangeaToken token = tokens[i];
+
+ PangeaToken? currentToken = token;
+ PangeaToken? nextToken = i < tokens.length - 1 ? tokens[i + 1] : null;
+
+ final isPunct = token.pos == 'PUNCT';
+ final nextIsPunct = nextToken?.pos == 'PUNCT';
+
+ final int startIndex = i;
+ if (isPunct || nextIsPunct) {
+ while (nextToken != null && currentToken?.end == nextToken.start) {
+ i++;
+ currentToken = nextToken;
+ nextToken = i < tokens.length - 1 ? tokens[i + 1] : null;
+ }
+ }
+
+ final adjacentTokens = tokens.sublist(startIndex, i + 1);
+ if (adjacentTokens.every((t) => t.pos == 'PUNCT')) {
+ continue;
+ }
+
+ final position = TokenPosition(
+ token: adjacentTokens.firstWhere((t) => t.pos != 'PUNCT'),
+ startIndex: startIndex,
+ endIndex: i,
+ );
+ positions.add(position);
+ }
+
+ _setCachedTokenPositions(eventID, positions);
+ return positions;
+ }
+
+ /// Given a list of tokens, reconstructs an original message, including gaps for non-token elements.
+ static List getGlobalTokenPositions(
List tokens,
) {
final List tokenPositions = [];
@@ -83,3 +158,13 @@ class TokensUtil {
return tokenPositions;
}
}
+
+class _TokenPositionCacheItem {
+ final List positions;
+ final DateTime timestamp;
+
+ _TokenPositionCacheItem(
+ this.positions,
+ this.timestamp,
+ );
+}
diff --git a/lib/pangea/toolbar/widgets/stt_transcript_tokens.dart b/lib/pangea/toolbar/widgets/stt_transcript_tokens.dart
index fcea78ddf..e63bcb7df 100644
--- a/lib/pangea/toolbar/widgets/stt_transcript_tokens.dart
+++ b/lib/pangea/toolbar/widgets/stt_transcript_tokens.dart
@@ -37,7 +37,8 @@ class SttTranscriptTokens extends StatelessWidget {
textScaler: TextScaler.noScaling,
text: TextSpan(
style: style ?? DefaultTextStyle.of(context).style,
- children: TokensUtil.getTokenPositions(tokens).map((tokenPosition) {
+ children:
+ TokensUtil.getGlobalTokenPositions(tokens).map((tokenPosition) {
final text = messageCharacters
.skip(tokenPosition.startIndex)
.take(tokenPosition.endIndex - tokenPosition.startIndex)