import React, {
  createContext,
  useState,
  useContext,
  ReactNode,
  useEffect,
  useRef,
} from "react";
import { SpeechAnalysis } from "../../utils/speechAnalysis";
import { useAuth } from "../AuthContext";
import { useLesson } from "../LessonContext";
import * as sdk from "microsoft-cognitiveservices-speech-sdk";
import { getSpeechToken } from "../../utils/api";
import { SpeechTokenType } from "../../types/models";
import {
  cleanString,
  convertOrdinal,
  getAllIndexesOfWord,
  getLastOccurrence,
  numberToWords,
  recognizerEdgeCases,
} from "../../utils/string";
import { useTimer } from "../../hooks/useTimer";
import useSpeechErrors from "../../hooks/useSpeechErrors";

const MAX_WORDS_SKIPPED = 4;
const TEN_MINUTES_IN_MS = 10 * 60 * 1000;
const SpeechContext = createContext<any | undefined>(undefined);

interface SpeechProviderProps {
  children: ReactNode;
}

export const SpeechProvider: React.FC<SpeechProviderProps> = ({ children }) => {
  const { isMispronunciationError } = useSpeechErrors();
  const [isRecognizingLetters, setIsRecognizingLetters] = useState(false);
  const [recognitionStarted, setRecognitionStarted] = useState(false);
  const [wordsSpokenInCorrectOrder, setWordsSpokenInCorrectOrder] =
    useState<any>([]);
  const [currentSpeechResult, setCurrentSpeechResult] = useState([]);
  const [allAnalyzedWords, setAllAnalyzedWords] = useState<any>([]);
  const [analyzedWordsWithErrors, setAnalyzedWordsWithErrors] = useState<any>({
    words: [],
    scores: [],
  });

  const [notAnalyzedWords, setNotAnalyzedWords] = useState<any>([]);
  const [speechErrors, setSpeechErrors] = useState<any>([]);
  const [allPreRecognitionWords, setAllPreRecognitionWords] = useState<any>([]);
  const [scores, setScores] = useState<any>([]);
  const [currentPhrase, setCurrentPhrase] = useState<string>("");
  const [nextWord, setNextWord] = useState<string>("");
  const [nextWordIndex, setNextWordIndex] = useState<number>(-1);
  const [audioUrl, setAudioUrl] = useState<string | null>(null);
  const [recording, setRecording] = useState(false);
  const [hasInterventions, setHasInterventions] = useState(false);
  const [tempFlag, setTempFlag] = useState(false);

  const speechAnalysisRef = useRef<SpeechAnalysis | null>(null);
  const [speechToken, setSpeechToken] = useState<SpeechTokenType | null>(null);
  const { userId } = useAuth();
  const { lessonPhrase } = useLesson();

  const allPreRecognitionWordsRef = useRef(allPreRecognitionWords);
  const wordsSpokenInCorrectOrderRef = useRef(wordsSpokenInCorrectOrder);
  const speechTokenInitialized = useRef(false);
  const nextWordRef = useRef(nextWord);
  const nextWordIndexRef = useRef(nextWordIndex);
  const lessonPhraseWordsRef = useRef<any>([]);
  const remainingWordsRef = useRef<any>([]);

  const timer = useTimer();

  useEffect(() => {
    allPreRecognitionWordsRef.current = allPreRecognitionWords;
  }, [allPreRecognitionWords]);

  useEffect(() => {
    wordsSpokenInCorrectOrderRef.current = wordsSpokenInCorrectOrder;
  }, [wordsSpokenInCorrectOrder]);

  useEffect(() => {
    nextWordRef.current = nextWord;
  }, [nextWord]);

  useEffect(() => {
    nextWordIndexRef.current = nextWordIndex;
  }, [nextWordIndex]);

  useEffect(() => {
    remainingWordsRef.current = lessonPhrase.split(" ");
  }, [lessonPhrase]);

  useEffect(() => {
    setNotAnalyzedWords(
      cleanString(lessonPhrase)
        .split(" ")
        .filter((word: string) => {
          return !allAnalyzedWords
            .map((item: any) => cleanString(item.Word))
            .includes(word);
        })
        .filter((word: string) => !!word)
    );
  }, [allAnalyzedWords, currentPhrase]);

  useEffect(() => {
    const errors = currentSpeechResult.filter((item: any) => {
      return isMispronunciationError(item);
    });

    setSpeechErrors(errors);
  }, [currentSpeechResult]);

  useEffect(() => {
    if (!recognitionStarted || !speechTokenInitialized.current) return;

    handleStartRecording();
  }, [currentPhrase, recognitionStarted]);

  useEffect(() => {
    if (speechToken) {
      speechTokenInitialized.current = true;
    }
  }, [speechToken]);

  useEffect(() => {
    if (!currentPhrase) return;

    setNextWord(currentPhrase.split(" ")[0]);
  }, [currentPhrase]);

  useEffect(() => {
    setScoresIncremental(speechAnalysisRef.current?.pronunciationAssessment);
  }, [currentSpeechResult]);

  const updateSpeechToken = async () => {
    const { data } = await getSpeechToken();
    setSpeechToken(data);
    speechAnalysisRef.current?.updateSpeechToken(data.token);
  };
  const scheduleNextTokenRefresh = () => {
    return setTimeout(() => {
      updateSpeechToken()
        .catch((err) => {
          console.error("tokenRefresh error:", err);
        })
        .finally(() => {
          scheduleNextTokenRefresh();
        });
    }, TEN_MINUTES_IN_MS);
  };

  useEffect(() => {
    if (userId && !speechToken) {
      updateSpeechToken().catch((err) => {
        console.error("updateSpeechToken error:", err);
      });
    }
    const timeoutId = scheduleNextTokenRefresh();
    return () => {
      clearTimeout(timeoutId);
    };
  }, [userId, speechToken]);

  const onRecognizing = (e: any) => {
    const text = JSON.parse(
      e.result.properties.getProperty(
        sdk.PropertyId.SpeechServiceResponse_JsonResult
      )
    ).Text;

    const cleanLessonPhrase = cleanString(lessonPhrase);

    lessonPhraseWordsRef.current = cleanLessonPhrase
      .split(" ")
      .map((word: string) => word.toLowerCase());

    if (!text) return;

    const recognizedWords = text
      .toLowerCase()
      .split(isRecognizingLetters ? "" : " ")
      .map((word: string) =>
        convertOrdinal(numberToWords(recognizerEdgeCases(word)))
      );

    allPreRecognitionWordsRef.current = [
      ...allPreRecognitionWordsRef.current,
      ...recognizedWords,
    ];

    remainingWordsRef.current = cleanString(lessonPhrase)
      .split(" ")
      .map((word: string) => word.toLowerCase())
      .map((word: string, index: number) => {
        return {
          word,
          index,
        };
      })
      .filter((item: any) => {
        return nextWordIndexRef.current < item.index;
      })
      .map((item: any) => item.word);

    const nextWordIndexes = getAllIndexesOfWord({
      lessonPhraseWords: cleanLessonPhrase
        .split(" ")
        .map((word: string) => word.toLowerCase()),
      word: allPreRecognitionWordsRef.current[
        allPreRecognitionWordsRef.current.length - 1
      ],
    });

    const lastOccurrenceIndex =
      nextWordIndexes.length > 1
        ? getLastOccurrence({
            phrase: cleanLessonPhrase,
            word: allPreRecognitionWordsRef.current[
              allPreRecognitionWordsRef.current.length - 1
            ],
            remainingWords: remainingWordsRef.current,
            indexes: nextWordIndexes,
          })
        : nextWordIndexes[0];

    if (
      lastOccurrenceIndex >= nextWordIndexRef.current &&
      lastOccurrenceIndex - nextWordIndexRef.current <= MAX_WORDS_SKIPPED
    ) {
      setNextWord(remainingWordsRef.current[0]);
      nextWordRef.current = remainingWordsRef.current[0];
      setNextWordIndex(lastOccurrenceIndex);
      nextWordIndexRef.current = lastOccurrenceIndex;
    }
    setAllPreRecognitionWords([...allPreRecognitionWordsRef.current]);
  };

  const onRecognized = (e: any) => {
    const result = JSON.parse(
      e.result.properties.getProperty(
        sdk.PropertyId.SpeechServiceResponse_JsonResult
      )
    );

    const cleanLessonPhrase = cleanString(lessonPhrase);

    const nb = result["NBest"][0];

    setAnalyzedWordsWithErrors((prev: any) => {
      return {
        words: [...prev.words, ...nb.Words],
        scores: [...prev.scores, nb.PronunciationAssessment],
      };
    });

    setCurrentSpeechResult(
      speechAnalysisRef?.current?.currentSpeechResult.map((item: any) => {
        const wordIndexes = getAllIndexesOfWord({
          lessonPhraseWords: cleanLessonPhrase
            .split(" ")
            .map((word: string) => word.toLowerCase()),
          word: item.Word,
        });

        const lastOccurrenceIndex =
          wordIndexes.length > 1
            ? getLastOccurrence({
                phrase: cleanLessonPhrase,
                word: allPreRecognitionWordsRef.current[
                  allPreRecognitionWordsRef.current.length - 1
                ],
                remainingWords: remainingWordsRef.current,
                indexes: wordIndexes,
              })
            : wordIndexes[0];
        return {
          ...item,
          index: lastOccurrenceIndex,
        };
      }) || []
    );

    const spokenWords = nb.Words.filter((word: any) => {
      return word.PronunciationAssessment.ErrorType !== "Omission";
    });

    setAllAnalyzedWords((prev: any) => {
      return [...prev, ...spokenWords];
    });
  };

  const handleStartRecording = async () => {
    if (speechAnalysisRef.current) {
      setTempFlag(false);
      speechAnalysisRef.current.close();
    }

    speechAnalysisRef.current = new SpeechAnalysis({
      referenceText: currentPhrase,
      speechToken,
      onRecognizing,
      onRecognized,
      // onSessionStopped: () => {},
    });

    // Reset states
    setCurrentSpeechResult([]);
    setAudioUrl(null);

    // Initialize speech analysis and media recorder
    try {
      if (!speechToken || !speechAnalysisRef.current) {
        return;
      }
      speechAnalysisRef.current?.recognizer?.startContinuousRecognitionAsync();

      setRecording(true);
      setTempFlag(true);
    } catch (err) {
      console.error("Error accessing audio devices:", err);
    }
  };

  const handleStopRecording = () => {
    if (speechAnalysisRef?.current) {
      speechAnalysisRef?.current.close();
      setRecording(false);
    }
  };

  function setScoresIncremental(newScores: any) {
    const scoreIsEmpty =
      scores.AccuracyScore === 0 &&
      scores.FluencyScore === 0 &&
      scores.CompletenessScore === 0 &&
      scores.ProsodyScore === 0 &&
      scores.PronScore === 0;

    if (scoreIsEmpty && newScores) {
      setScores([newScores]);
      return;
    }

    if (!newScores || newScores.accuracyScore === 0) return;

    setScores([...scores, newScores]);
  }

  function resetAllStates() {
    setWordsSpokenInCorrectOrder([]);
    setCurrentSpeechResult([]);
    setAllAnalyzedWords([]);
    setNotAnalyzedWords([]);
    setAllPreRecognitionWords([]);
    setNextWordIndex(-1);
  }

  return (
    <SpeechContext.Provider
      value={{
        setCurrentPhrase,
        currentSpeechResult,
        setCurrentSpeechResult,
        setScoresIncremental,
        speechErrors,
        allAnalyzedWords,
        setAllAnalyzedWords,
        remainingWordsRef,
        analyzedWordsWithErrors,
        scores,
        currentPhrase,
        audioUrl,
        recording,
        handleStartRecording,
        handleStopRecording,
        speechAnalysisRef,
        nextWord,
        nextWordIndex,
        setRecognitionStarted,
        recognitionStarted,
        hasInterventions,
        setHasInterventions,
        notAnalyzedWords,
        allPreRecognitionWords,
        setAllPreRecognitionWords,
        tempFlag,
        setTempFlag,
        setNextWord,
        setNextWordIndex,
        resetAllStates,
        timer,
        setIsRecognizingLetters,
      }}
    >
      <>{children}</>
    </SpeechContext.Provider>
  );
};

export const useSpeech = () => {
  const context = useContext(SpeechContext);
  if (context === undefined) {
    throw new Error("useSpeech must be used within a SpeechProvider");
  }

  return context;
};
