Spaces:

samaritan-ai
/

marianmt-he2arc-sam

Sleeping

App Files Files Community

marianmt-he2arc-sam / app.py

johnlockejrr

Upload app.py

9023eee verified 5 months ago

raw

history blame contribute delete

11.1 kB

	import streamlit as st
	import torch
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	import time
	from typing import Optional
	import json

	# Page configuration
	st.set_page_config(
	page_title="Samaritan Hebrew to Samaritan Targumic Aramaic Translation",
	page_icon="📚",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Custom CSS for modern styling
	st.markdown("""
	<style>
	.main-header {
	font-size: 3rem;
	font-weight: 700;
	background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	text-align: center;
	margin-bottom: 2rem;
	}

	.sub-header {
	font-size: 1.2rem;
	color: #666;
	text-align: center;
	margin-bottom: 3rem;
	}

	.translation-box {
	background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
	padding: 2rem;
	border-radius: 15px;
	box-shadow: 0 8px 32px rgba(0,0,0,0.1);
	margin: 1rem 0;
	}

	.input-area {
	background: white;
	border-radius: 10px;
	padding: 1.5rem;
	box-shadow: 0 4px 16px rgba(0,0,0,0.05);
	}

	.output-area {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	border-radius: 10px;
	padding: 1.5rem;
	box-shadow: 0 4px 16px rgba(0,0,0,0.1);
	}

	.direction-selector {
	background: white;
	border-radius: 10px;
	padding: 1rem;
	box-shadow: 0 4px 16px rgba(0,0,0,0.05);
	margin-bottom: 1rem;
	}

	.stButton > button {
	background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
	color: white;
	border: none;
	border-radius: 25px;
	padding: 0.75rem 2rem;
	font-weight: 600;
	transition: all 0.3s ease;
	}

	.stButton > button:hover {
	transform: translateY(-2px);
	box-shadow: 0 8px 25px rgba(102, 126, 234, 0.4);
	}

	.model-info {
	background: #f8f9fa;
	border-radius: 10px;
	padding: 1rem;
	margin: 1rem 0;
	border-left: 4px solid #667eea;
	}
	</style>
	""", unsafe_allow_html=True)

	@st.cache_resource
	def load_model():
	"""Load the Hugging Face model and tokenizer with caching."""
	model_name = "johnlockejrr/marianmt-he2arc-sam"

	with st.spinner("Loading translation model..."):
	try:
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

	# Move to GPU if available
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model.to(device)
	model.eval()

	return tokenizer, model, device
	except Exception as e:
	st.error(f"Error loading model: {str(e)}")
	return None, None, None

	def translate_text(text: str, direction: str, tokenizer, model, device: str, max_length: int = 512) -> Optional[str]:
	"""Translate text using the loaded model."""
	if not text.strip():
	return None

	try:
	# Add language prefix based on direction (using the correct sem-sem model format)
	if direction == "Hebrew to Aramaic":
	input_text = f">>heb<< {text}"
	else: # Aramaic to Hebrew
	input_text = f">>arc<< {text}"

	# Tokenize input
	inputs = tokenizer(
	input_text,
	return_tensors="pt",
	max_length=max_length,
	truncation=True,
	padding=True
	).to(device)

	# Generate translation
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_length=max_length,
	num_beams=4,
	length_penalty=0.6,
	early_stopping=True,
	do_sample=False
	)

	# Decode output
	translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return translation

	except Exception as e:
	st.error(f"Translation error: {str(e)}")
	return None

	def main():
	# Header
	st.markdown('<h1 class="main-header">📚 Samaritan Hebrew-Aramaic Translator</h1>', unsafe_allow_html=True)
	st.markdown('<p class="sub-header">Powered by the johnlockejrr/marianmt-he2arc-sam model</p>', unsafe_allow_html=True)

	# Load model
	tokenizer, model, device = load_model()

	if tokenizer is None or model is None:
	st.error("Failed to load the translation model. Please check your internet connection and try again.")
	return

	# Sidebar for settings
	with st.sidebar:
	st.markdown("### ⚙️ Settings")

	# Max length setting
	max_length = st.slider(
	"Maximum Output Length",
	min_value=64,
	max_value=512,
	value=256,
	step=32,
	help="Maximum length of the generated translation"
	)

	# Model info
	st.markdown("### 📊 Model Information")
	st.markdown(f"Model: johnlockejrr/marianmt-he2arc-sam")
	st.markdown(f"Device: {device.upper()}")
	st.markdown(f"Tokenizer: {tokenizer.__class__.__name__}")
	st.markdown(f"Model Type: {model.__class__.__name__}")
	st.markdown(f"Direction: Samaritan Hebrew → Samaritan Aramaic")

	# Clear button
	if st.button("🗑️ Clear All"):
	st.rerun()

	# Main content area
	col1, col2 = st.columns([1, 1])

	with col1:
	st.markdown('<div class="input-area">', unsafe_allow_html=True)
	st.markdown("### 📝 Input Text")

	# Text input
	input_text = st.text_area(
	"Enter Samaritan Hebrew text to translate",
	height=200,
	placeholder="Enter your Samaritan Hebrew text here...",
	help="Type or paste the Samaritan Hebrew text you want to translate to Samaritan Aramaic"
	)

	# Translate button
	translate_button = st.button(
	"🔄 Translate to Samaritan Aramaic",
	type="primary",
	use_container_width=True
	)
	st.markdown('</div>', unsafe_allow_html=True)

	with col2:
	st.markdown('<div class="output-area">', unsafe_allow_html=True)
	st.markdown("### 🎯 Samaritan Aramaic Translation")

	if translate_button and input_text.strip():
	with st.spinner("Translating to Samaritan Aramaic..."):
	# Add a small delay for better UX
	time.sleep(0.5)

	translation = translate_text(
	input_text,
	"Hebrew to Aramaic",
	tokenizer,
	model,
	device,
	max_length
	)

	if translation:
	st.markdown(f"Samaritan Aramaic:")
	# Display translation in a code block that can be easily copied
	st.code(translation, language=None)
	else:
	st.error("Translation failed. Please try again.")
	else:
	st.markdown("Samaritan Aramaic translation will appear here")
	st.markdown('</div>', unsafe_allow_html=True)

	# Additional features
	st.markdown("---")

	# Batch translation section
	st.markdown("### 📚 Batch Translation")
	st.markdown("Upload a text file with multiple Samaritan Hebrew lines to translate them all to Samaritan Aramaic.")

	uploaded_file = st.file_uploader(
	"Choose a text file",
	type=['txt'],
	help="Upload a .txt file with one Samaritan Hebrew text per line"
	)

	if uploaded_file is not None:
	try:
	# Read file content
	content = uploaded_file.read().decode('utf-8')
	lines = [line.strip() for line in content.split('\n') if line.strip()]

	if lines:
	st.success(f"📄 Loaded {len(lines)} lines from {uploaded_file.name}")

	if st.button("🔄 Translate All to Samaritan Aramaic", type="primary"):
	st.markdown("### 📋 Batch Translation Results")

	# Create a progress bar
	progress_bar = st.progress(0)
	status_text = st.empty()

	results = []
	for i, line in enumerate(lines):
	status_text.text(f"Translating line {i+1}/{len(lines)}: {line[:50]}...")

	translation = translate_text(
	line,
	"Hebrew to Aramaic",
	tokenizer,
	model,
	device,
	max_length
	)

	results.append({
	'original': line,
	'translation': translation or "Translation failed"
	})

	# Update progress
	progress_bar.progress((i + 1) / len(lines))

	status_text.text("✅ Translation complete!")

	# Display results
	for i, result in enumerate(results):
	with st.expander(f"Line {i+1}: {result['original'][:50]}..."):
	st.markdown(f"Samaritan Hebrew: {result['original']}")
	st.markdown(f"Samaritan Aramaic: {result['translation']}")

	# Download results
	csv_content = "Samaritan Hebrew,Samaritan Aramaic\n"
	for result in results:
	csv_content += f'"{result["original"]}","{result["translation"]}"\n'

	st.download_button(
	label="📥 Download Results as CSV",
	data=csv_content,
	file_name="samaritan_translations.csv",
	mime="text/csv"
	)

	except Exception as e:
	st.error(f"Error reading file: {str(e)}")

	# Footer
	st.markdown("---")
	st.markdown("""
	<div style="text-align: center; color: #666; padding: 2rem;">
	<p>Built with ❤️ using Streamlit and Hugging Face Transformers</p>
	<p>Samaritan Hebrew to Samaritan Aramaic Translation</p>
	<p>Model: johnlockejrr/marianmt-he2arc-sam</p>
	</div>
	""", unsafe_allow_html=True)

	if __name__ == "__main__":
	main()