Spaces:
Sleeping
Sleeping
| import os | |
| import glob | |
| from langchain.document_loaders import UnstructuredMarkdownLoader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.schema import Document | |
| from transformers import pipeline | |
| from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
| from peft import PeftModel, PeftConfig | |
| import streamlit as st | |
| def main(): | |
| # set page title | |
| st.set_page_config(page_title="Project Nexodus Documentation Retreival", page_icon="π", layout="wide") | |
| st.header("π Ask Project Nexodus Docs") | |
| # set description | |
| st.markdown(""" | |
| Generates answers to your questions about Project Nexodus by leveraging foundation models to perform search and retreival of Nexodus documentation.\n | |
| Feeling stuck? Here are some examples of questions you can ask: | |
| * How do I run the control plane for Nexodus? | |
| * How do I monitor the Nexodus stack locally? | |
| * How can I contribute to Project Nexodus? | |
| """) | |
| # set sidebar | |
| with st.sidebar: | |
| # create instructions for use | |
| st.markdown(""" | |
| # How to use:\n | |
| 1. Enter your HuggingFace API token below | |
| 2. Select your answer generation strategy from the dropdown menu | |
| 3. Ask a question about Linux networking | |
| 4. Click on the `Submit` button or optionally, click on the `Feeling Lucky` | |
| """) | |
| # create input box for HF API token | |
| API_KEY = st.text_input('Hugging Face API Token π€', type='password', | |
| placeholder='Paste your HuggingFace token here (sk-...)', | |
| help="You can get your API token from https://huggingface.co/docs/hub/security-tokens.") | |
| st.markdown(""" | |
| # About | |
| Talk to Project Nexodus is a web application that answers your questions about Nexodus, | |
| with the goal of exploring the capabilities and limitations of Large Language Models (LLMs) for question and | |
| answering tasks. It demonstrates the following strategies for question answering: extractive, abstractive, and | |
| generative. | |
| This project is still in beta and mainly used for research purposes. It is highly unadvised for users to rely on it for Project Nexodus troubleshooting. | |
| Please refer to the [official Nexodus documentation](https://github.com/nexodus-io/nexodus) for help. Proceed at your own risk π | |
| """) | |
| if API_KEY: | |
| strategy = st.selectbox('Q&A Strategy', ['Extractive', 'Abstractive', 'Finetuned with LoRA']) | |
| question = st.text_input("Enter your question here:") | |
| col1, col2 = st.columns([1,1]) | |
| with col1: | |
| generate_answer = st.button("Generate Answer") | |
| with col2: | |
| feeling_lucky = st.button("Feeling Lucky") | |
| if question != "": | |
| if strategy and generate_answer: | |
| answer = get_answer(question, strategy) | |
| st.write(answer) | |
| elif feeling_lucky: | |
| answer = get_answer(question, 'Generative') | |
| st.write(answer) | |
| def load_db(): | |
| # initalize embedder | |
| print('Loading FAISS index...') | |
| embeddings = HuggingFaceEmbeddings() | |
| # load FAISS vector database storing Nexodus documentation | |
| db = FAISS.load_local("nexodus_index.faiss", embeddings) | |
| return db | |
| def load_model(): | |
| llm = "deepset/roberta-base-squad2" | |
| return llm | |
| def load_model_tokenizer(strategy): | |
| model_name = "google/flan-t5-base" | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name,load_in_8bit=True, device_map='auto') | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| if strategy == 'Finetuned with LoRA': | |
| print(f"Loading finetuned {model_name} with LoRA...") | |
| model = PeftModel.from_pretrained(model, 'exyou/nexodus-flan-T5') | |
| print("Model sucessfully loaded") | |
| return model, tokenizer | |
| def provide_context(context): | |
| context = [f"<P> {m.page_content}" for m in context] | |
| context = " ".join(context) | |
| return context | |
| def get_answer(question, strategy): | |
| if strategy == 'Generative': | |
| model, tokenizer = load_model_tokenizer(strategy) | |
| context = "" | |
| question_context = f"Question: ## {question} ##\n Context: ## {context} ##" | |
| input_ids = tokenizer(question_context, return_tensors="pt", truncation=True).input_ids.cuda() | |
| outputs = model.generate(input_ids=input_ids, max_new_tokens=1000, do_sample=True, top_p=1) | |
| answer = tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0] | |
| return answer | |
| else: | |
| db = load_db() | |
| if strategy == 'Finetuned with LoRA': | |
| model, tokenizer = load_model_tokenizer(strategy) | |
| # get the top 3 most similar sentences in the docs to the inputted question | |
| top_3 = db.similarity_search(question, k=3) | |
| # set as context for the question | |
| context = provide_context(top_3) | |
| question_context = f"Question: ## {question} ##\n Context: ## {context} ##" | |
| input_ids = tokenizer(question_context, return_tensors="pt", truncation=True).input_ids.cuda() | |
| outputs = model.generate(input_ids=input_ids, max_new_tokens=1000, do_sample=True, top_p=1) | |
| answer = tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0] | |
| return answer | |
| else: | |
| llm = load_model() | |
| if strategy == 'Extractive': | |
| output = db.similarity_search(question, k=1) | |
| answer = output[0].page_content | |
| return answer | |
| elif strategy == 'Abstractive': | |
| top_3 = db.similarity_search(question, k=3) | |
| context = provide_context(top_3) | |
| text2text_generator = pipeline(task='question-answering', tokenizer=llm, model=llm) | |
| output = text2text_generator(question=question, context=context, temperature=1.5, min_length=5, max_length=50) | |
| answer = output["answer"] | |
| return answer | |
| return answer | |
| if __name__ == "__main__": | |
| main() | |