Spaces:

Arif-Badhon
/

llm-data-analyzer

Sleeping

Arif

Adding all to hf branch

814316f 9 days ago

8.02 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	import io
	import os

	# Page configuration
	st.set_page_config(
	page_title="📊 LLM Data Analyzer",
	page_icon="📊",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Custom CSS for better UI
	st.markdown("""
	<style>
	.main {
	padding: 0rem 1rem;
	}
	.stTabs [data-baseweb="tab-list"] {
	gap: 2px;
	}
	</style>
	""", unsafe_allow_html=True)

	# Title and description
	st.title("📊 LLM Data Analyzer")
	st.markdown("""
	Analyze your CSV/Excel files and chat with an AI assistant powered by Llama 2.
	This app runs on free Hugging Face CPU - response time ~5-10 seconds per query.
	""")

	# Cache model loading to avoid reloading
	@st.cache_resource
	def load_llm_model():
	"""Load Llama 2 model from Hugging Face Hub"""
	st.info("📥 Downloading model (first time only, ~4GB)... This may take 2-3 minutes.")

	try:
	model_path = hf_hub_download(
	repo_id="TheBloke/Llama-2-7B-Chat-GGUF",
	filename="llama-2-7b-chat.Q4_K_M.gguf"
	)

	llm = Llama(
	model_path=model_path,
	n_ctx=2048,
	n_threads=4,
	n_gpu_layers=0, # CPU only (free tier)
	verbose=False
	)
	return llm
	except Exception as e:
	st.error(f"Error loading model: {e}")
	return None

	# Load model
	llm = load_llm_model()

	if llm is None:
	st.error("Failed to load model. Please refresh the page.")
	st.stop()

	st.success("✅ Model loaded successfully!")

	# Create tabs
	tab1, tab2, tab3 = st.tabs(["📤 Upload & Analyze", "💬 Chat", "📊 About"])

	# ============================================================================
	# TAB 1: Upload & Analyze
	# ============================================================================
	with tab1:
	st.header("📤 Upload and Analyze Data")

	uploaded_file = st.file_uploader(
	"Upload a CSV or Excel file",
	type=["csv", "xlsx", "xls"],
	help="Supported formats: CSV, Excel"
	)

	if uploaded_file is not None:
	st.success(f"✅ File uploaded: {uploaded_file.name}")

	# Read the file
	try:
	if uploaded_file.name.endswith('.csv'):
	df = pd.read_csv(uploaded_file)
	else:
	df = pd.read_excel(uploaded_file)

	# Display data preview
	st.subheader("📋 Data Preview")
	st.dataframe(df.head(10), use_container_width=True)

	# Display statistics
	st.subheader("📊 Data Statistics")
	col1, col2, col3 = st.columns(3)

	with col1:
	st.metric("Rows", df.shape[0])
	with col2:
	st.metric("Columns", df.shape[1])
	with col3:
	st.metric("Memory", f"{df.memory_usage(deep=True).sum() / 1024:.2f} KB")

	# Detailed statistics
	st.write(df.describe().T)

	# Ask AI about the data
	st.subheader("❓ Ask AI About Your Data")
	question = st.text_input(
	"What would you like to know about this data?",
	placeholder="e.g., What is the average value in column X?"
	)

	if question:
	with st.spinner("🤔 AI is analyzing your data..."):
	# Create prompt
	data_summary = df.describe().to_string()
	prompt = f"""You are a data analyst expert. You have the following data summary:

	{data_summary}

	Column names: {', '.join(df.columns.tolist())}

	User's question: {question}

	Please provide a clear, concise analysis based on the data summary. Focus on actionable insights."""

	# Generate response
	response = llm(
	prompt,
	max_tokens=300,
	stop=["\n\nUser:", "Question:"],
	echo=False,
	temperature=0.7
	)

	answer = response['choices'][0]['text'].strip()
	st.success("✅ Analysis Complete")
	st.write(answer)

	except Exception as e:
	st.error(f"Error reading file: {e}")

	# ============================================================================
	# TAB 2: Chat
	# ============================================================================
	with tab2:
	st.header("💬 Chat with AI Assistant")
	st.write("Have a conversation with Llama 2. Ask anything!")

	# Initialize session state for chat history
	if "messages" not in st.session_state:
	st.session_state.messages = []

	# Display chat history
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	# Chat input
	user_input = st.chat_input("Type your message here...")

	if user_input:
	# Add user message to history
	st.session_state.messages.append({"role": "user", "content": user_input})

	# Display user message
	with st.chat_message("user"):
	st.markdown(user_input)

	# Generate AI response
	with st.chat_message("assistant"):
	with st.spinner("⏳ Generating response..."):
	prompt = f"""You are a helpful AI assistant. The user asks: {user_input}

	Provide a clear, helpful, and concise response."""

	response = llm(
	prompt,
	max_tokens=300,
	stop=["\n\nUser:", "User:"],
	echo=False,
	temperature=0.7
	)

	assistant_message = response['choices'][0]['text'].strip()
	st.markdown(assistant_message)

	# Add assistant message to history
	st.session_state.messages.append({
	"role": "assistant",
	"content": assistant_message
	})

	# ============================================================================
	# TAB 3: About
	# ============================================================================
	with tab3:
	st.header("ℹ️ About This App")

	st.markdown("""
	### 🎯 What is this?

	LLM Data Analyzer is an AI-powered tool for analyzing data and having conversations with an intelligent assistant.

	### 🔧 Technology Stack

	- Model: Llama 2 7B (quantized to 4-bit)
	- Framework: Llama.cpp (CPU inference)
	- Frontend: Streamlit
	- Hosting: Hugging Face Spaces (Free Tier)

	### ⚡ Performance

	- Speed: ~5-10 tokens per second (free CPU)
	- Context: 2048 tokens max
	- Model Size: 4GB (quantized)
	- Hardware: Free tier CPU

	### 💡 Use Cases

	1. Data Analysis: Upload CSV/Excel and ask questions
	2. Chat: General conversation with AI
	3. Learning: Understand your data better

	### 🚀 Faster Version Available

	For GPU acceleration (70+ tokens/sec):
	- Run locally on Apple Silicon Mac using MLX
	- Upgrade to Hugging Face PRO tier
	- Deploy on GPU-enabled cloud servers

	### 📝 Tips

	- Keep questions focused and specific for best results
	- First request takes longer (model loading)
	- Data is processed locally, not stored on server

	### 🔗 Links

	- [GitHub Repository](#) - Source code
	- [Hugging Face Hub](#) - Model info
	- [Llama.cpp](#) - Inference engine

	---

	Version: 1.0 \| Last Updated: Dec 2025
	""")