You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
fluffychat/lib/pangea/choreographer/models/igc_text_data_model.dart

439 lines
14 KiB
Dart

2 years ago
import 'dart:developer';
import 'package:flutter/foundation.dart';
import 'package:flutter/material.dart';
import 'package:collection/collection.dart';
import 'package:matrix/matrix.dart';
import 'package:fluffychat/pangea/choreographer/models/language_detection_model.dart';
import 'package:fluffychat/pangea/choreographer/models/pangea_match_model.dart';
import 'package:fluffychat/pangea/choreographer/models/span_card_model.dart';
import 'package:fluffychat/pangea/choreographer/models/span_data.dart';
import 'package:fluffychat/pangea/choreographer/repo/language_detection_repo.dart';
import 'package:fluffychat/pangea/common/utils/error_handler.dart';
import 'package:fluffychat/pangea/events/event_wrappers/pangea_representation_event.dart';
import 'package:fluffychat/pangea/events/models/pangea_token_model.dart';
import 'package:fluffychat/pangea/events/models/representation_content_model.dart';
import 'package:fluffychat/pangea/learning_settings/constants/language_constants.dart';
import '../../common/constants/model_keys.dart';
2 years ago
2 years ago
// import 'package:language_tool/language_tool.dart';
2 years ago
class IGCTextData {
LanguageDetectionResponse detections;
2 years ago
String originalInput;
String? fullTextCorrection;
List<PangeaToken> tokens;
List<PangeaMatch> matches;
String userL1;
String userL2;
bool enableIT;
bool enableIGC;
bool loading = false;
IGCTextData({
required this.detections,
required this.originalInput,
required this.fullTextCorrection,
required this.tokens,
required this.matches,
required this.userL1,
required this.userL2,
required this.enableIT,
required this.enableIGC,
});
factory IGCTextData.fromJson(Map<String, dynamic> json) {
// changing this to allow for use of the LanguageDetectionResponse methods
// TODO - change API after we're sure all clients are updated. not urgent.
final LanguageDetectionResponse detections =
json[_detectionsKey] is Iterable
? LanguageDetectionResponse.fromJson({
"detections": json[_detectionsKey],
"full_text": json["original_input"],
})
: LanguageDetectionResponse.fromJson(
json[_detectionsKey] as Map<String, dynamic>,
);
2 years ago
return IGCTextData(
tokens: (json[_tokensKey] as Iterable)
.map<PangeaToken>(
(e) => PangeaToken.fromJson(e as Map<String, dynamic>),
)
.toList()
.cast<PangeaToken>(),
matches: json[_matchesKey] != null
? (json[_matchesKey] as Iterable)
.map<PangeaMatch>(
(e) {
return PangeaMatch.fromJson(e as Map<String, dynamic>);
},
)
.toList()
.cast<PangeaMatch>()
: [],
detections: detections,
2 years ago
originalInput: json["original_input"],
fullTextCorrection: json["full_text_correction"],
userL1: json[ModelKey.userL1],
userL2: json[ModelKey.userL2],
enableIT: json["enable_it"],
enableIGC: json["enable_igc"],
);
}
factory IGCTextData.fromRepresentationEvent(
RepresentationEvent event,
String userL1,
String userL2,
) {
final PangeaRepresentation content = event.content;
final List<PangeaToken> tokens = event.tokens ?? [];
final List<PangeaMatch> matches = event.choreo?.choreoSteps
.map((step) => step.acceptedOrIgnoredMatch)
.whereType<PangeaMatch>()
.toList() ??
[];
String originalInput = content.text;
if (matches.isNotEmpty) {
originalInput = matches.first.match.fullText;
}
return IGCTextData(
detections: LanguageDetectionResponse(
detections: [
LanguageDetection(langCode: content.langCode, confidence: 1),
],
fullText: content.text,
),
originalInput: originalInput,
fullTextCorrection: content.text,
tokens: tokens,
matches: matches,
userL1: userL1,
userL2: userL2,
enableIT: true,
enableIGC: true,
);
}
2 years ago
static const String _tokensKey = "tokens";
static const String _matchesKey = "matches";
static const String _detectionsKey = "detections";
Map<String, dynamic> toJson() => {
_detectionsKey: detections.toJson(),
2 years ago
"original_input": originalInput,
"full_text_correction": fullTextCorrection,
_tokensKey: tokens.map((e) => e.toJson()).toList(),
_matchesKey: matches.map((e) => e.toJson()).toList(),
ModelKey.userL1: userL1,
ModelKey.userL2: userL2,
"enable_it": enableIT,
"enable_igc": enableIGC,
};
/// if we haven't run IGC or IT or there are no matches, we use the highest validated detection
/// from [LanguageDetectionResponse.highestValidatedDetection]
/// if we have run igc/it and there are no matches, we can relax the threshold
/// and use the highest confidence detection
String get detectedLanguage {
return detections.detections.firstOrNull?.langCode ??
LanguageKeys.unknownLanguage;
// if (!(enableIGC && enableIT) || matches.isNotEmpty) {
// return detections.highestValidatedDetection().langCode;
// } else {
// return detections.highestConfidenceDetection.langCode;
// }
}
2 years ago
// reconstruct fullText based on accepted match
//update offsets in existing matches to reflect the change
//if existing matches overlap with the accepted one, remove them??
void acceptReplacement(
int matchIndex,
int choiceIndex,
) async {
//should be already added to choreoRecord
//TODO - that should be done in the same function to avoid error potential
2 years ago
final PangeaMatch pangeaMatch = matches[matchIndex];
if (pangeaMatch.match.choices == null) {
debugger(when: kDebugMode);
ErrorHandler.logError(
m: "pangeaMatch.match.choices is null in acceptReplacement",
data: {
"match": pangeaMatch.match.toJson(),
},
2 years ago
);
return;
}
final SpanChoice replacement = pangeaMatch.match.choices![choiceIndex];
2 years ago
originalInput = originalInput.replaceRange(
pangeaMatch.match.offset,
pangeaMatch.match.offset + pangeaMatch.match.length,
replacement.value,
2 years ago
);
int startIndex;
int endIndex;
// replace the tokens that are part of the match
// with the tokens in the replacement
// start is inclusive
try {
startIndex = tokenIndexByOffset(pangeaMatch.match.offset);
// end is exclusive, hence the +1
// use pangeaMatch.matchContent.trim().length instead of pangeaMatch.match.length since pangeaMatch.match.length may include leading/trailing spaces
endIndex = tokenIndexByOffset(
pangeaMatch.match.offset + pangeaMatch.matchContent.trim().length,
) +
1;
} catch (err, s) {
matches.removeAt(matchIndex);
for (final match in matches) {
match.match.fullText = originalInput;
if (match.match.offset > pangeaMatch.match.offset) {
match.match.offset +=
replacement.value.length - pangeaMatch.match.length;
}
}
ErrorHandler.logError(
e: err,
s: s,
data: {
"cursorOffset": pangeaMatch.match.offset,
"match": pangeaMatch.match.toJson(),
"tokens": tokens.map((e) => e.toJson()).toString(),
},
);
return;
}
// for all tokens after the replacement, update their offsets
for (int i = endIndex; i < tokens.length; i++) {
tokens[i].text.offset +=
replacement.value.length - pangeaMatch.match.length;
}
// clone the list for debugging purposes
final List<PangeaToken> newTokens = List.from(tokens);
// replace the tokens in the list
newTokens.replaceRange(startIndex, endIndex, replacement.tokens);
final String newFullText = PangeaToken.reconstructText(newTokens);
if (newFullText.trim() != originalInput.trim() && kDebugMode) {
PangeaToken.reconstructText(newTokens, debugWalkThrough: true);
ErrorHandler.logError(
m: "reconstructed text not working",
s: StackTrace.current,
data: {
"originalInput": originalInput,
"newFullText": newFullText,
"match": pangeaMatch.match.toJson(),
},
);
}
tokens = newTokens;
2 years ago
//update offsets in existing matches to reflect the change
//Question - remove matches that overlap with the accepted one?
// see case of "quiero ver un fix"
matches.removeAt(matchIndex);
for (final match in matches) {
match.match.fullText = originalInput;
if (match.match.offset > pangeaMatch.match.offset) {
match.match.offset +=
replacement.value.length - pangeaMatch.match.length;
2 years ago
}
}
}
void removeMatchByOffset(int offset) {
final int index = getTopMatchIndexForOffset(offset);
if (index != -1) {
matches.removeAt(index);
}
}
int tokenIndexByOffset(int cursorOffset) {
final tokenIndex = tokens.indexWhere(
(token) => token.start <= cursorOffset && cursorOffset <= token.end,
);
if (tokenIndex < 0) {
throw "No token found for cursor offset";
}
return tokenIndex;
}
2 years ago
List<int> matchIndicesByOffset(int offset) {
final List<int> matchesForOffset = [];
for (final (index, match) in matches.indexed) {
if (match.isOffsetInMatchSpan(offset)) {
matchesForOffset.add(index);
}
}
return matchesForOffset;
}
2 years ago
int getTopMatchIndexForOffset(int offset) {
final List<int> matchesForToken = matchIndicesByOffset(offset);
final int matchIndex = matchesForToken.indexWhere((matchIndex) {
2 years ago
final match = matches[matchIndex];
return (enableIT && (match.isITStart || match.isl1SpanMatch)) ||
(enableIGC && match.isGrammarMatch);
});
if (matchIndex == -1) return -1;
return matchesForToken[matchIndex];
2 years ago
}
PangeaMatch? getTopMatchForToken(PangeaToken token) {
final int topMatchIndex = getTopMatchIndexForOffset(token.text.offset);
if (topMatchIndex == -1) return null;
return matches[topMatchIndex];
}
int getAfterTokenSpacingByIndex(int tokenIndex) {
final int endOfToken = tokens[tokenIndex].end;
2 years ago
if (tokenIndex + 1 < tokens.length) {
final spaceBetween = tokens[tokenIndex + 1].text.offset - endOfToken;
if (spaceBetween < 0) {
ErrorHandler.logError(
m: "weird token lengths for ${tokens[tokenIndex].text.content} and ${tokens[tokenIndex + 1].text.content}",
data: {
"fullText": originalInput,
"tokens": tokens.map((e) => e.toJson()).toString(),
},
2 years ago
);
return 0;
}
return spaceBetween;
} else {
return originalInput.length - endOfToken;
}
}
static TextStyle underlineStyle(Color color) => TextStyle(
decoration: TextDecoration.underline,
decorationColor: color,
decorationThickness: 5,
);
TextSpan getSpanItem({
required int start,
required int end,
TextStyle? style,
}) {
return TextSpan(
text: originalInput.characters.getRange(start, end).toString(),
style: style,
);
}
2 years ago
//PTODO - handle multitoken spans
/// Returns a list of [TextSpan]s used to display the text in the input field
/// with the appropriate styling for each error match.
2 years ago
List<TextSpan> constructTokenSpan({
required BuildContext context,
TextStyle? defaultStyle,
required SpanCardModel? spanCardModel,
required bool handleClick,
required String transformTargetId,
required Room room,
}) {
final List<TextSpan> items = [];
if (loading) {
return [
TextSpan(
text: originalInput,
style: defaultStyle,
),
];
}
final List<List<int>> matchRanges = matches
.map(
(match) => [
match.match.offset,
match.match.length + match.match.offset,
],
)
.toList();
// create a pointer to the current index in the original input
// and iterate until the pointer has reached the end of the input
int currentIndex = 0;
while (currentIndex < originalInput.characters.length) {
// check if the pointer is at a match, and if so, get the index of the match
final int matchIndex = matchRanges.indexWhere(
(range) => currentIndex >= range[0] && currentIndex < range[1],
);
final bool inMatch = matchIndex != -1;
if (inMatch) {
// if the pointer is in a match, then add that match to items
// and then move the pointer to the end of the match range
final PangeaMatch match = matches[matchIndex];
items.add(
getSpanItem(
start: match.match.offset,
end: match.match.offset + match.match.length,
style: match.textStyle(defaultStyle),
2 years ago
),
);
currentIndex = match.match.offset + match.match.length;
} else {
// otherwise, if the pointer is not at a match, then add all the text
// until the next match (or, if there is not next match, the end of the
// text) to items and move the pointer to the start of the next match
final int nextIndex = matchRanges
.firstWhereOrNull(
(range) => range[0] > currentIndex,
)
?.first ??
originalInput.characters.length;
items.add(
getSpanItem(
start: currentIndex,
end: nextIndex,
style: defaultStyle,
),
);
currentIndex = nextIndex;
}
}
2 years ago
return items;
}
List<PangeaToken> matchTokens(int matchIndex) {
if (matchIndex >= matches.length) {
return [];
}
final PangeaMatch match = matches[matchIndex];
final List<PangeaToken> tokensForMatch = [];
for (final token in tokens) {
if (match.isOffsetInMatchSpan(token.text.offset)) {
tokensForMatch.add(token);
}
}
return tokensForMatch;
}
}