import os
import glob

from langchain.document_loaders import UnstructuredMarkdownLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document

from transformers import pipeline 
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

from peft import PeftModel, PeftConfig

import streamlit as st


def main():
    # set page title
    st.set_page_config(page_title="Project Nexodus Documentation Retreival", page_icon="📖", layout="wide")
    st.header("📖 Ask Project Nexodus Docs")
    # set description
    st.markdown("""
                Generates answers to your questions about Project Nexodus by leveraging foundation models to perform search and retreival of Nexodus documentation.\n
                
                Feeling stuck? Here are some examples of questions you can ask:
                * How do I run the control plane for Nexodus?
                * How do I monitor the Nexodus stack locally?
                * How can I contribute to Project Nexodus?
                """)
    # set sidebar
    with st.sidebar:
        # create instructions for use 
        st.markdown("""
                    # How to use:\n
                    1. Enter your HuggingFace API token below
                    2. Select your answer generation strategy from the dropdown menu
                    3. Ask a question about Linux networking
                    4. Click on the `Submit` button or optionally, click on the `Feeling Lucky`
                    """)
         # create input box for HF API token                                                                                                  
        API_KEY = st.text_input('Hugging Face API Token 🤗', type='password', 
                                placeholder='Paste your HuggingFace token here (sk-...)', 
                                help="You can get your API token from https://huggingface.co/docs/hub/security-tokens.")
        
        st.markdown("""
                    # About 
                    Talk to Project Nexodus is a web application that answers your questions about Nexodus,
                    with the goal of exploring the capabilities and limitations of Large Language Models (LLMs) for question and 
                    answering tasks. It demonstrates the following strategies for question answering: extractive, abstractive, and 
                    generative. 
                    
                     This project is still in beta and mainly used for research purposes. It is highly unadvised for users to rely on it for Project Nexodus troubleshooting. 
                     Please refer to the [official Nexodus documentation](https://github.com/nexodus-io/nexodus) for help. Proceed at your own risk 💀
                    """)
    if API_KEY:
        strategy = st.selectbox('Q&A Strategy', ['Extractive', 'Abstractive', 'Finetuned with LoRA'])
        question = st.text_input("Enter your question here:")
        col1, col2 = st.columns([1,1])
        with col1:
            generate_answer = st.button("Generate Answer")
        with col2:
            feeling_lucky = st.button("Feeling Lucky")

        if question != "": 
            if strategy and generate_answer:
                answer = get_answer(question, strategy)
                st.write(answer)
            elif feeling_lucky:
                answer = get_answer(question, 'Generative')
                st.write(answer)
        
def load_db():
    # initalize embedder 
    print('Loading FAISS index...')
    embeddings = HuggingFaceEmbeddings()
    # load FAISS vector database storing Nexodus documentation
    db = FAISS.load_local("nexodus_index.faiss", embeddings)
    return db

def load_model():
    llm = "deepset/roberta-base-squad2"
    return llm

def load_model_tokenizer(strategy):
    model_name = "google/flan-t5-base"
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name,load_in_8bit=True, device_map='auto')
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    
    if strategy == 'Finetuned with LoRA':
        print(f"Loading finetuned {model_name} with LoRA...") 
        model = PeftModel.from_pretrained(model, 'exyou/nexodus-flan-T5')
        print("Model sucessfully loaded")
        
    return model, tokenizer

def provide_context(context):
    context = [f"<P> {m.page_content}" for m in context]
    context = " ".join(context)
    return context

def get_answer(question, strategy):
    if strategy == 'Generative':
        model, tokenizer = load_model_tokenizer(strategy)
        context = ""
        question_context = f"Question: ## {question} ##\n Context: ## {context} ##"
        input_ids = tokenizer(question_context, return_tensors="pt", truncation=True).input_ids.cuda()
        outputs = model.generate(input_ids=input_ids, max_new_tokens=1000, do_sample=True, top_p=1)
        answer = tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0]
        return answer
        
    else:
        db = load_db()
        if strategy == 'Finetuned with LoRA':
            model, tokenizer = load_model_tokenizer(strategy)
            # get the top 3 most similar sentences in the docs to the inputted question
            top_3 = db.similarity_search(question, k=3)
            # set as context for the question
            context = provide_context(top_3)
            question_context = f"Question: ## {question} ##\n Context: ## {context} ##"
            input_ids = tokenizer(question_context, return_tensors="pt", truncation=True).input_ids.cuda()
            outputs = model.generate(input_ids=input_ids, max_new_tokens=1000, do_sample=True, top_p=1)
            answer = tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0]
            return answer
        
        else: 
            llm = load_model()
            if strategy == 'Extractive':
                output = db.similarity_search(question, k=1)
                answer = output[0].page_content
                return answer

            elif strategy == 'Abstractive':
                top_3 = db.similarity_search(question, k=3)
                context = provide_context(top_3)
                text2text_generator = pipeline(task='question-answering', tokenizer=llm, model=llm)
                output = text2text_generator(question=question, context=context, temperature=1.5, min_length=5, max_length=50)
                answer = output["answer"]
                return answer

    return answer

if __name__ == "__main__":
    main()