Spaces:

damoojeje
/

SmartManuals-AI

Sleeping

App Files Files Community

damoojeje commited on May 25

Commit

98c93fa

verified ·

1 Parent(s): 32e9a12

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -20

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ import docx2txt
 from PIL import Image
 from io import BytesIO
 from tqdm import tqdm
-from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 from sentence_transformers import SentenceTransformer, util
 from nltk.tokenize import sent_tokenize
@@ -116,7 +116,7 @@ def embed_all():
                     ids.append(chunk_id)
                     metas.append({"source": fname, "page": page})
-                    if len(docs) >= 16:
                         embs = embedder.encode(docs).tolist()
                         collection.add(documents=docs, ids=ids, metadatas=metas, embeddings=embs)
                         docs, ids, metas = [], [], []
@@ -135,20 +135,14 @@ def embed_all():
 # ---------------- Model Setup ----------------
 def load_model():
     try:
-        tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
-        model = AutoModelForCausalLM.from_pretrained(
-            MODEL_ID,
-            token=HF_TOKEN,
-            device_map="auto" if torch.cuda.is_available() else None,
-            torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
-        ).to(device)
-        pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
-        return pipe, tokenizer
     except Exception as e:
         print("Model loading failed:", e)
         return None, None
-def ask_model(question, context, pipe, tokenizer):
     prompt = f"""Use only the following context to answer. If uncertain, say \"I don't know.\"
 <context>
@@ -157,18 +151,19 @@ def ask_model(question, context, pipe, tokenizer):
 Q: {question}
 A:"""
-    output = pipe(prompt, max_new_tokens=512)[0]["generated_text"]
-    return output.split("A:")[-1].strip()
 # ---------------- Query ----------------
 def get_answer(question):
-    if not embedder or not db or not model_pipe:
         return "System not ready. Try again after initialization."
     try:
         query_emb = embedder.encode(question, convert_to_tensor=True)
         results = db.query(query_texts=[question], n_results=MAX_CONTEXT_CHUNKS)
         context = "\n\n".join(results["documents"][0])
-        return ask_model(question, context, model_pipe, model_tokenizer)
     except Exception as e:
         print("Query error:", e)
         return f"Error: {e}"
@@ -184,8 +179,8 @@ with gr.Blocks() as demo:
 # Startup Initialization
 embedder = None
-model_pipe = None
-model_tokenizer = None
 try:
     db, embedder = embed_all()
@@ -193,10 +188,10 @@ except Exception as e:
     print("❌ Embedding failed:", e)
 try:
-    model_pipe, model_tokenizer = load_model()
 except Exception as e:
     print("❌ Model load failed:", e)
 # Launch
 if __name__ == "__main__":
-    demo.launch(share=True)

 from PIL import Image
 from io import BytesIO
 from tqdm import tqdm
+from transformers import AutoProcessor, AutoModelForVision2Seq
 from sentence_transformers import SentenceTransformer, util
 from nltk.tokenize import sent_tokenize
                     ids.append(chunk_id)
                     metas.append({"source": fname, "page": page})
+                    if len(docs) >= 32:  # Increased batch size for efficiency
                         embs = embedder.encode(docs).tolist()
                         collection.add(documents=docs, ids=ids, metadatas=metas, embeddings=embs)
                         docs, ids, metas = [], [], []
 # ---------------- Model Setup ----------------
 def load_model():
     try:
+        processor = AutoProcessor.from_pretrained(MODEL_ID, token=HF_TOKEN)
+        model = AutoModelForVision2Seq.from_pretrained(MODEL_ID, token=HF_TOKEN).to(device)
+        return model, processor
     except Exception as e:
         print("Model loading failed:", e)
         return None, None
+def ask_model(question, context, model, processor):
     prompt = f"""Use only the following context to answer. If uncertain, say \"I don't know.\"
 <context>
 Q: {question}
 A:"""
+    inputs = processor(prompt, return_tensors="pt").to(device)
+    output = model.generate(**inputs)
+    return processor.decode(output[0], skip_special_tokens=True)
 # ---------------- Query ----------------
 def get_answer(question):
+    if not embedder or not db or not model:
         return "System not ready. Try again after initialization."
     try:
         query_emb = embedder.encode(question, convert_to_tensor=True)
         results = db.query(query_texts=[question], n_results=MAX_CONTEXT_CHUNKS)
         context = "\n\n".join(results["documents"][0])
+        return ask_model(question, context, model, processor)
     except Exception as e:
         print("Query error:", e)
         return f"Error: {e}"
 # Startup Initialization
 embedder = None
+model = None
+processor = None
 try:
     db, embedder = embed_all()
     print("❌ Embedding failed:", e)
 try:
+    model, processor = load_model()
 except Exception as e:
     print("❌ Model load failed:", e)
 # Launch
 if __name__ == "__main__":
+    demo.launch(share=False)