fluffychat/lib/pangea/choreographer/models/igc_text_data_model.dart

import 'dart:developer';

import 'package:flutter/foundation.dart';
import 'package:flutter/material.dart';

import 'package:collection/collection.dart';
import 'package:matrix/matrix.dart';

import 'package:fluffychat/pangea/choreographer/models/language_detection_model.dart';
import 'package:fluffychat/pangea/choreographer/models/pangea_match_model.dart';
import 'package:fluffychat/pangea/choreographer/models/span_card_model.dart';
import 'package:fluffychat/pangea/choreographer/models/span_data.dart';
import 'package:fluffychat/pangea/choreographer/repo/language_detection_repo.dart';
import 'package:fluffychat/pangea/common/utils/error_handler.dart';
import 'package:fluffychat/pangea/events/event_wrappers/pangea_representation_event.dart';
import 'package:fluffychat/pangea/events/models/pangea_token_model.dart';
import 'package:fluffychat/pangea/events/models/representation_content_model.dart';
import 'package:fluffychat/pangea/learning_settings/constants/language_constants.dart';
import '../../common/constants/model_keys.dart';

// import 'package:language_tool/language_tool.dart';

class IGCTextData {
  LanguageDetectionResponse detections;
  String originalInput;
  String? fullTextCorrection;
  List<PangeaToken> tokens;
  List<PangeaMatch> matches;
  String userL1;
  String userL2;
  bool enableIT;
  bool enableIGC;
  bool loading = false;

  IGCTextData({
    required this.detections,
    required this.originalInput,
    required this.fullTextCorrection,
    required this.tokens,
    required this.matches,
    required this.userL1,
    required this.userL2,
    required this.enableIT,
    required this.enableIGC,
  });

  factory IGCTextData.fromJson(Map<String, dynamic> json) {
    // changing this to allow for use of the LanguageDetectionResponse methods
    // TODO - change API after we're sure all clients are updated. not urgent.
    final LanguageDetectionResponse detections =
        json[_detectionsKey] is Iterable
            ? LanguageDetectionResponse.fromJson({
                "detections": json[_detectionsKey],
                "full_text": json["original_input"],
              })
            : LanguageDetectionResponse.fromJson(
                json[_detectionsKey] as Map<String, dynamic>,
              );

    return IGCTextData(
      tokens: (json[_tokensKey] as Iterable)
          .map<PangeaToken>(
            (e) => PangeaToken.fromJson(e as Map<String, dynamic>),
          )
          .toList()
          .cast<PangeaToken>(),
      matches: json[_matchesKey] != null
          ? (json[_matchesKey] as Iterable)
              .map<PangeaMatch>(
                (e) {
                  return PangeaMatch.fromJson(e as Map<String, dynamic>);
                },
              )
              .toList()
              .cast<PangeaMatch>()
          : [],
      detections: detections,
      originalInput: json["original_input"],
      fullTextCorrection: json["full_text_correction"],
      userL1: json[ModelKey.userL1],
      userL2: json[ModelKey.userL2],
      enableIT: json["enable_it"],
      enableIGC: json["enable_igc"],
    );
  }

  factory IGCTextData.fromRepresentationEvent(
    RepresentationEvent event,
    String userL1,
    String userL2,
  ) {
    final PangeaRepresentation content = event.content;
    final List<PangeaToken> tokens = event.tokens ?? [];
    final List<PangeaMatch> matches = event.choreo?.choreoSteps
            .map((step) => step.acceptedOrIgnoredMatch)
            .whereType<PangeaMatch>()
            .toList() ??
        [];

    String originalInput = content.text;
    if (matches.isNotEmpty) {
      originalInput = matches.first.match.fullText;
    }

    return IGCTextData(
      detections: LanguageDetectionResponse(
        detections: [
          LanguageDetection(langCode: content.langCode, confidence: 1),
        ],
        fullText: content.text,
      ),
      originalInput: originalInput,
      fullTextCorrection: content.text,
      tokens: tokens,
      matches: matches,
      userL1: userL1,
      userL2: userL2,
      enableIT: true,
      enableIGC: true,
    );
  }

  static const String _tokensKey = "tokens";
  static const String _matchesKey = "matches";
  static const String _detectionsKey = "detections";

  Map<String, dynamic> toJson() => {
        _detectionsKey: detections.toJson(),
        "original_input": originalInput,
        "full_text_correction": fullTextCorrection,
        _tokensKey: tokens.map((e) => e.toJson()).toList(),
        _matchesKey: matches.map((e) => e.toJson()).toList(),
        ModelKey.userL1: userL1,
        ModelKey.userL2: userL2,
        "enable_it": enableIT,
        "enable_igc": enableIGC,
      };

  /// if we haven't run IGC or IT or there are no matches, we use the highest validated detection
  /// from [LanguageDetectionResponse.highestValidatedDetection]
  /// if we have run igc/it and there are no matches, we can relax the threshold
  /// and use the highest confidence detection
  String get detectedLanguage {
    return detections.detections.firstOrNull?.langCode ??
        LanguageKeys.unknownLanguage;
    // if (!(enableIGC && enableIT) || matches.isNotEmpty) {
    //   return detections.highestValidatedDetection().langCode;
    // } else {
    //   return detections.highestConfidenceDetection.langCode;
    // }
  }

  // reconstruct fullText based on accepted match
  //update offsets in existing matches to reflect the change
  //if existing matches overlap with the accepted one, remove them??
  void acceptReplacement(
    int matchIndex,
    int choiceIndex,
  ) async {
    //should be already added to choreoRecord
    //TODO - that should be done in the same function to avoid error potential

    final PangeaMatch pangeaMatch = matches[matchIndex];

    if (pangeaMatch.match.choices == null) {
      debugger(when: kDebugMode);
      ErrorHandler.logError(
        m: "pangeaMatch.match.choices is null in acceptReplacement",
        data: {
          "match": pangeaMatch.match.toJson(),
        },
      );
      return;
    }

    final SpanChoice replacement = pangeaMatch.match.choices![choiceIndex];

    originalInput = originalInput.replaceRange(
      pangeaMatch.match.offset,
      pangeaMatch.match.offset + pangeaMatch.match.length,
      replacement.value,
    );

    int startIndex;
    int endIndex;

    // replace the tokens that are part of the match
    // with the tokens in the replacement
    //    start is inclusive
    try {
      startIndex = tokenIndexByOffset(pangeaMatch.match.offset);
      //    end is exclusive, hence the +1
      // use pangeaMatch.matchContent.trim().length instead of pangeaMatch.match.length since pangeaMatch.match.length may include leading/trailing spaces
      endIndex = tokenIndexByOffset(
            pangeaMatch.match.offset + pangeaMatch.matchContent.trim().length,
          ) +
          1;
    } catch (err, s) {
      matches.removeAt(matchIndex);

      for (final match in matches) {
        match.match.fullText = originalInput;
        if (match.match.offset > pangeaMatch.match.offset) {
          match.match.offset +=
              replacement.value.length - pangeaMatch.match.length;
        }
      }
      ErrorHandler.logError(
        e: err,
        s: s,
        data: {
          "cursorOffset": pangeaMatch.match.offset,
          "match": pangeaMatch.match.toJson(),
          "tokens": tokens.map((e) => e.toJson()).toString(),
        },
      );
      return;
    }

    // for all tokens after the replacement, update their offsets
    for (int i = endIndex; i < tokens.length; i++) {
      tokens[i].text.offset +=
          replacement.value.length - pangeaMatch.match.length;
    }

    // clone the list for debugging purposes
    final List<PangeaToken> newTokens = List.from(tokens);

    // replace the tokens in the list
    newTokens.replaceRange(startIndex, endIndex, replacement.tokens);

    final String newFullText = PangeaToken.reconstructText(newTokens);
    if (newFullText.trim() != originalInput.trim() && kDebugMode) {
      PangeaToken.reconstructText(newTokens, debugWalkThrough: true);
      ErrorHandler.logError(
        m: "reconstructed text not working",
        s: StackTrace.current,
        data: {
          "originalInput": originalInput,
          "newFullText": newFullText,
          "match": pangeaMatch.match.toJson(),
        },
      );
    }

    tokens = newTokens;

    //update offsets in existing matches to reflect the change
    //Question - remove matches that overlap with the accepted one?
    // see case of "quiero ver un fix"
    matches.removeAt(matchIndex);

    for (final match in matches) {
      match.match.fullText = originalInput;
      if (match.match.offset > pangeaMatch.match.offset) {
        match.match.offset +=
            replacement.value.length - pangeaMatch.match.length;
      }
    }
  }

  void removeMatchByOffset(int offset) {
    final int index = getTopMatchIndexForOffset(offset);
    if (index != -1) {
      matches.removeAt(index);
    }
  }

  int tokenIndexByOffset(int cursorOffset) {
    final tokenIndex = tokens.indexWhere(
      (token) => token.start <= cursorOffset && cursorOffset <= token.end,
    );
    if (tokenIndex < 0) {
      throw "No token found for cursor offset";
    }
    return tokenIndex;
  }

  List<int> matchIndicesByOffset(int offset) {
    final List<int> matchesForOffset = [];
    for (final (index, match) in matches.indexed) {
      if (match.isOffsetInMatchSpan(offset)) {
        matchesForOffset.add(index);
      }
    }
    return matchesForOffset;
  }

  int getTopMatchIndexForOffset(int offset) {
    final List<int> matchesForToken = matchIndicesByOffset(offset);
    final int matchIndex = matchesForToken.indexWhere((matchIndex) {
      final match = matches[matchIndex];
      return (enableIT && (match.isITStart || match.isl1SpanMatch)) ||
          (enableIGC && match.isGrammarMatch);
    });
    if (matchIndex == -1) return -1;
    return matchesForToken[matchIndex];
  }

  PangeaMatch? getTopMatchForToken(PangeaToken token) {
    final int topMatchIndex = getTopMatchIndexForOffset(token.text.offset);
    if (topMatchIndex == -1) return null;
    return matches[topMatchIndex];
  }

  int getAfterTokenSpacingByIndex(int tokenIndex) {
    final int endOfToken = tokens[tokenIndex].end;

    if (tokenIndex + 1 < tokens.length) {
      final spaceBetween = tokens[tokenIndex + 1].text.offset - endOfToken;

      if (spaceBetween < 0) {
        ErrorHandler.logError(
          m: "weird token lengths for ${tokens[tokenIndex].text.content} and ${tokens[tokenIndex + 1].text.content}",
          data: {
            "fullText": originalInput,
            "tokens": tokens.map((e) => e.toJson()).toString(),
          },
        );
        return 0;
      }
      return spaceBetween;
    } else {
      return originalInput.length - endOfToken;
    }
  }

  static TextStyle underlineStyle(Color color) => TextStyle(
        decoration: TextDecoration.underline,
        decorationColor: color,
        decorationThickness: 5,
      );

  TextSpan getSpanItem({
    required int start,
    required int end,
    TextStyle? style,
  }) {
    return TextSpan(
      text: originalInput.characters.getRange(start, end).toString(),
      style: style,
    );
  }

  //PTODO - handle multitoken spans
  /// Returns a list of [TextSpan]s used to display the text in the input field
  /// with the appropriate styling for each error match.
  List<TextSpan> constructTokenSpan({
    required BuildContext context,
    TextStyle? defaultStyle,
    required SpanCardModel? spanCardModel,
    required bool handleClick,
    required String transformTargetId,
    required Room room,
  }) {
    final List<TextSpan> items = [];

    if (loading) {
      return [
        TextSpan(
          text: originalInput,
          style: defaultStyle,
        ),
      ];
    }

    final List<List<int>> matchRanges = matches
        .map(
          (match) => [
            match.match.offset,
            match.match.length + match.match.offset,
          ],
        )
        .toList();

    // create a pointer to the current index in the original input
    // and iterate until the pointer has reached the end of the input
    int currentIndex = 0;
    while (currentIndex < originalInput.characters.length) {
      // check if the pointer is at a match, and if so, get the index of the match
      final int matchIndex = matchRanges.indexWhere(
        (range) => currentIndex >= range[0] && currentIndex < range[1],
      );
      final bool inMatch = matchIndex != -1;

      if (inMatch) {
        // if the pointer is in a match, then add that match to items
        // and then move the pointer to the end of the match range
        final PangeaMatch match = matches[matchIndex];
        items.add(
          getSpanItem(
            start: match.match.offset,
            end: match.match.offset + match.match.length,
            style: match.textStyle(defaultStyle),
          ),
        );

        currentIndex = match.match.offset + match.match.length;
      } else {
        // otherwise, if the pointer is not at a match, then add all the text
        // until the next match (or, if there is not next match, the end of the
        // text) to items and move the pointer to the start of the next match
        final int nextIndex = matchRanges
                .firstWhereOrNull(
                  (range) => range[0] > currentIndex,
                )
                ?.first ??
            originalInput.characters.length;

        items.add(
          getSpanItem(
            start: currentIndex,
            end: nextIndex,
            style: defaultStyle,
          ),
        );
        currentIndex = nextIndex;
      }
    }

    return items;
  }

  List<PangeaToken> matchTokens(int matchIndex) {
    if (matchIndex >= matches.length) {
      return [];
    }

    final PangeaMatch match = matches[matchIndex];
    final List<PangeaToken> tokensForMatch = [];
    for (final token in tokens) {
      if (match.isOffsetInMatchSpan(token.text.offset)) {
        tokensForMatch.add(token);
      }
    }
    return tokensForMatch;
  }
}