Spaces:
Sleeping
Sleeping
| #!pip install -qU langchain-community faiss-cpu faiss-gpu langchain-openai sentence_transformers gradio | |
| import faiss | |
| from langchain_community.docstore.in_memory import InMemoryDocstore | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_openai import OpenAIEmbeddings | |
| import os | |
| import pandas as pd | |
| from uuid import uuid4 | |
| from langchain_core.documents import Document | |
| import numpy as np | |
| #from sentence_transformers import SentenceTransformer | |
| from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter | |
| from langchain.chains import RetrievalQA | |
| from langchain.llms import OpenAI | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from langchain import PromptTemplate | |
| import gradio as gr | |
| df = pd.read_csv('news_paper-Cleaned.csv', encoding='utf-8', on_bad_lines='skip') | |
| os.environ["OPENAI_API_KEY"] = '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' | |
| embeddings = OpenAIEmbeddings(model="text-embedding-3-large") | |
| index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world"))) | |
| vector_store = FAISS( | |
| embedding_function=embeddings, | |
| index=index, | |
| docstore=InMemoryDocstore(), | |
| index_to_docstore_id={}, | |
| ) | |
| documents = [{ | |
| 'title': row['title'], | |
| 'author': row['author'], | |
| 'description': row['description'], | |
| 'full_text' : row['full_text'] | |
| } | |
| for _, row in df.iterrows()] | |
| full_text = [Document( | |
| page_content=str(doc), | |
| metadata={"source": "news"}, | |
| ) for doc in documents] | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| # Set a really small chunk size, just to show. | |
| chunk_size=1000, | |
| chunk_overlap=100, | |
| length_function=len, | |
| is_separator_regex=False, | |
| ) | |
| text_split = text_splitter.split_documents(full_text) | |
| uuids = [str(uuid4()) for _ in range(len(text_split))] | |
| vector_store.add_documents(documents=text_split, ids=uuids) | |
| retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 10}) | |
| def questions(query): | |
| template = """ | |
| You are a helpful assistant that that can answer questions about specific data. | |
| You have answer only from this Context. | |
| You will receive 10 Answer return all and spilt between them by new line. | |
| Question: {question} | |
| Context: {context} | |
| Answer: | |
| """ | |
| PROMPT = PromptTemplate(template=template, input_variables=['question', 'context']) | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=OpenAI(), | |
| retriever=retriever, | |
| chain_type_kwargs={"prompt": PROMPT}, | |
| ) | |
| return qa_chain({"query": query})['result'] | |
| demo = gr.Interface(fn=questions, inputs="text", outputs="text") | |
| demo.launch() |