Spaces:

exyou
/

talk_to_project_nexodus

Sleeping

App Files Files Community

talk_to_project_nexodus / app.py

exyou

minor update

259fe21 over 2 years ago

raw

history blame contribute delete

6.65 kB

	import os
	import glob

	from langchain.document_loaders import UnstructuredMarkdownLoader
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.vectorstores import FAISS
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.schema import Document

	from transformers import pipeline
	from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

	from peft import PeftModel, PeftConfig

	import streamlit as st


	def main():
	# set page title
	st.set_page_config(page_title="Project Nexodus Documentation Retreival", page_icon="📖", layout="wide")
	st.header("📖 Ask Project Nexodus Docs")
	# set description
	st.markdown("""
	Generates answers to your questions about Project Nexodus by leveraging foundation models to perform search and retreival of Nexodus documentation.\n

	Feeling stuck? Here are some examples of questions you can ask:
	* How do I run the control plane for Nexodus?
	* How do I monitor the Nexodus stack locally?
	* How can I contribute to Project Nexodus?
	""")
	# set sidebar
	with st.sidebar:
	# create instructions for use
	st.markdown("""
	# How to use:\n
	1. Enter your HuggingFace API token below
	2. Select your answer generation strategy from the dropdown menu
	3. Ask a question about Linux networking
	4. Click on the `Submit` button or optionally, click on the `Feeling Lucky`
	""")
	# create input box for HF API token
	API_KEY = st.text_input('Hugging Face API Token 🤗', type='password',
	placeholder='Paste your HuggingFace token here (sk-...)',
	help="You can get your API token from https://huggingface.co/docs/hub/security-tokens.")

	st.markdown("""
	# About
	Talk to Project Nexodus is a web application that answers your questions about Nexodus,
	with the goal of exploring the capabilities and limitations of Large Language Models (LLMs) for question and
	answering tasks. It demonstrates the following strategies for question answering: extractive, abstractive, and
	generative.

	This project is still in beta and mainly used for research purposes. It is highly unadvised for users to rely on it for Project Nexodus troubleshooting.
	Please refer to the [official Nexodus documentation](https://github.com/nexodus-io/nexodus) for help. Proceed at your own risk 💀
	""")
	if API_KEY:
	strategy = st.selectbox('Q&A Strategy', ['Extractive', 'Abstractive', 'Finetuned with LoRA'])
	question = st.text_input("Enter your question here:")
	col1, col2 = st.columns([1,1])
	with col1:
	generate_answer = st.button("Generate Answer")
	with col2:
	feeling_lucky = st.button("Feeling Lucky")

	if question != "":
	if strategy and generate_answer:
	answer = get_answer(question, strategy)
	st.write(answer)
	elif feeling_lucky:
	answer = get_answer(question, 'Generative')
	st.write(answer)

	def load_db():
	# initalize embedder
	print('Loading FAISS index...')
	embeddings = HuggingFaceEmbeddings()
	# load FAISS vector database storing Nexodus documentation
	db = FAISS.load_local("nexodus_index.faiss", embeddings)
	return db

	def load_model():
	llm = "deepset/roberta-base-squad2"
	return llm

	def load_model_tokenizer(strategy):
	model_name = "google/flan-t5-base"
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name,load_in_8bit=True, device_map='auto')
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	if strategy == 'Finetuned with LoRA':
	print(f"Loading finetuned {model_name} with LoRA...")
	model = PeftModel.from_pretrained(model, 'exyou/nexodus-flan-T5')
	print("Model sucessfully loaded")

	return model, tokenizer

	def provide_context(context):
	context = [f"<P> {m.page_content}" for m in context]
	context = " ".join(context)
	return context

	def get_answer(question, strategy):
	if strategy == 'Generative':
	model, tokenizer = load_model_tokenizer(strategy)
	context = ""
	question_context = f"Question: ## {question} ##\n Context: ## {context} ##"
	input_ids = tokenizer(question_context, return_tensors="pt", truncation=True).input_ids.cuda()
	outputs = model.generate(input_ids=input_ids, max_new_tokens=1000, do_sample=True, top_p=1)
	answer = tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0]
	return answer

	else:
	db = load_db()
	if strategy == 'Finetuned with LoRA':
	model, tokenizer = load_model_tokenizer(strategy)
	# get the top 3 most similar sentences in the docs to the inputted question
	top_3 = db.similarity_search(question, k=3)
	# set as context for the question
	context = provide_context(top_3)
	question_context = f"Question: ## {question} ##\n Context: ## {context} ##"
	input_ids = tokenizer(question_context, return_tensors="pt", truncation=True).input_ids.cuda()
	outputs = model.generate(input_ids=input_ids, max_new_tokens=1000, do_sample=True, top_p=1)
	answer = tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0]
	return answer

	else:
	llm = load_model()
	if strategy == 'Extractive':
	output = db.similarity_search(question, k=1)
	answer = output[0].page_content
	return answer

	elif strategy == 'Abstractive':
	top_3 = db.similarity_search(question, k=3)
	context = provide_context(top_3)
	text2text_generator = pipeline(task='question-answering', tokenizer=llm, model=llm)
	output = text2text_generator(question=question, context=context, temperature=1.5, min_length=5, max_length=50)
	answer = output["answer"]
	return answer

	return answer

	if __name__ == "__main__":
	main()