Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| import time | |
| from typing import Optional | |
| import json | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="Samaritan Hebrew to Samaritan Targumic Aramaic Translation", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Custom CSS for modern styling | |
| st.markdown(""" | |
| <style> | |
| .main-header { | |
| font-size: 3rem; | |
| font-weight: 700; | |
| background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| } | |
| .sub-header { | |
| font-size: 1.2rem; | |
| color: #666; | |
| text-align: center; | |
| margin-bottom: 3rem; | |
| } | |
| .translation-box { | |
| background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); | |
| padding: 2rem; | |
| border-radius: 15px; | |
| box-shadow: 0 8px 32px rgba(0,0,0,0.1); | |
| margin: 1rem 0; | |
| } | |
| .input-area { | |
| background: white; | |
| border-radius: 10px; | |
| padding: 1.5rem; | |
| box-shadow: 0 4px 16px rgba(0,0,0,0.05); | |
| } | |
| .output-area { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| border-radius: 10px; | |
| padding: 1.5rem; | |
| box-shadow: 0 4px 16px rgba(0,0,0,0.1); | |
| } | |
| .direction-selector { | |
| background: white; | |
| border-radius: 10px; | |
| padding: 1rem; | |
| box-shadow: 0 4px 16px rgba(0,0,0,0.05); | |
| margin-bottom: 1rem; | |
| } | |
| .stButton > button { | |
| background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| border: none; | |
| border-radius: 25px; | |
| padding: 0.75rem 2rem; | |
| font-weight: 600; | |
| transition: all 0.3s ease; | |
| } | |
| .stButton > button:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 8px 25px rgba(102, 126, 234, 0.4); | |
| } | |
| .model-info { | |
| background: #f8f9fa; | |
| border-radius: 10px; | |
| padding: 1rem; | |
| margin: 1rem 0; | |
| border-left: 4px solid #667eea; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| def load_model(): | |
| """Load the Hugging Face model and tokenizer with caching.""" | |
| model_name = "johnlockejrr/marianmt-he2arc-sam" | |
| with st.spinner("Loading translation model..."): | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| # Move to GPU if available | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model.to(device) | |
| model.eval() | |
| return tokenizer, model, device | |
| except Exception as e: | |
| st.error(f"Error loading model: {str(e)}") | |
| return None, None, None | |
| def translate_text(text: str, direction: str, tokenizer, model, device: str, max_length: int = 512) -> Optional[str]: | |
| """Translate text using the loaded model.""" | |
| if not text.strip(): | |
| return None | |
| try: | |
| # Add language prefix based on direction (using the correct sem-sem model format) | |
| if direction == "Hebrew to Aramaic": | |
| input_text = f">>heb<< {text}" | |
| else: # Aramaic to Hebrew | |
| input_text = f">>arc<< {text}" | |
| # Tokenize input | |
| inputs = tokenizer( | |
| input_text, | |
| return_tensors="pt", | |
| max_length=max_length, | |
| truncation=True, | |
| padding=True | |
| ).to(device) | |
| # Generate translation | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_length=max_length, | |
| num_beams=4, | |
| length_penalty=0.6, | |
| early_stopping=True, | |
| do_sample=False | |
| ) | |
| # Decode output | |
| translation = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return translation | |
| except Exception as e: | |
| st.error(f"Translation error: {str(e)}") | |
| return None | |
| def main(): | |
| # Header | |
| st.markdown('<h1 class="main-header">π Samaritan Hebrew-Aramaic Translator</h1>', unsafe_allow_html=True) | |
| st.markdown('<p class="sub-header">Powered by the johnlockejrr/marianmt-he2arc-sam model</p>', unsafe_allow_html=True) | |
| # Load model | |
| tokenizer, model, device = load_model() | |
| if tokenizer is None or model is None: | |
| st.error("Failed to load the translation model. Please check your internet connection and try again.") | |
| return | |
| # Sidebar for settings | |
| with st.sidebar: | |
| st.markdown("### βοΈ Settings") | |
| # Max length setting | |
| max_length = st.slider( | |
| "Maximum Output Length", | |
| min_value=64, | |
| max_value=512, | |
| value=256, | |
| step=32, | |
| help="Maximum length of the generated translation" | |
| ) | |
| # Model info | |
| st.markdown("### π Model Information") | |
| st.markdown(f"**Model:** johnlockejrr/marianmt-he2arc-sam") | |
| st.markdown(f"**Device:** {device.upper()}") | |
| st.markdown(f"**Tokenizer:** {tokenizer.__class__.__name__}") | |
| st.markdown(f"**Model Type:** {model.__class__.__name__}") | |
| st.markdown(f"**Direction:** Samaritan Hebrew β Samaritan Aramaic") | |
| # Clear button | |
| if st.button("ποΈ Clear All"): | |
| st.rerun() | |
| # Main content area | |
| col1, col2 = st.columns([1, 1]) | |
| with col1: | |
| st.markdown('<div class="input-area">', unsafe_allow_html=True) | |
| st.markdown("### π Input Text") | |
| # Text input | |
| input_text = st.text_area( | |
| "Enter Samaritan Hebrew text to translate", | |
| height=200, | |
| placeholder="Enter your Samaritan Hebrew text here...", | |
| help="Type or paste the Samaritan Hebrew text you want to translate to Samaritan Aramaic" | |
| ) | |
| # Translate button | |
| translate_button = st.button( | |
| "π Translate to Samaritan Aramaic", | |
| type="primary", | |
| use_container_width=True | |
| ) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| with col2: | |
| st.markdown('<div class="output-area">', unsafe_allow_html=True) | |
| st.markdown("### π― Samaritan Aramaic Translation") | |
| if translate_button and input_text.strip(): | |
| with st.spinner("Translating to Samaritan Aramaic..."): | |
| # Add a small delay for better UX | |
| time.sleep(0.5) | |
| translation = translate_text( | |
| input_text, | |
| "Hebrew to Aramaic", | |
| tokenizer, | |
| model, | |
| device, | |
| max_length | |
| ) | |
| if translation: | |
| st.markdown(f"**Samaritan Aramaic:**") | |
| # Display translation in a code block that can be easily copied | |
| st.code(translation, language=None) | |
| else: | |
| st.error("Translation failed. Please try again.") | |
| else: | |
| st.markdown("*Samaritan Aramaic translation will appear here*") | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # Additional features | |
| st.markdown("---") | |
| # Batch translation section | |
| st.markdown("### π Batch Translation") | |
| st.markdown("Upload a text file with multiple Samaritan Hebrew lines to translate them all to Samaritan Aramaic.") | |
| uploaded_file = st.file_uploader( | |
| "Choose a text file", | |
| type=['txt'], | |
| help="Upload a .txt file with one Samaritan Hebrew text per line" | |
| ) | |
| if uploaded_file is not None: | |
| try: | |
| # Read file content | |
| content = uploaded_file.read().decode('utf-8') | |
| lines = [line.strip() for line in content.split('\n') if line.strip()] | |
| if lines: | |
| st.success(f"π Loaded {len(lines)} lines from {uploaded_file.name}") | |
| if st.button("π Translate All to Samaritan Aramaic", type="primary"): | |
| st.markdown("### π Batch Translation Results") | |
| # Create a progress bar | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| results = [] | |
| for i, line in enumerate(lines): | |
| status_text.text(f"Translating line {i+1}/{len(lines)}: {line[:50]}...") | |
| translation = translate_text( | |
| line, | |
| "Hebrew to Aramaic", | |
| tokenizer, | |
| model, | |
| device, | |
| max_length | |
| ) | |
| results.append({ | |
| 'original': line, | |
| 'translation': translation or "Translation failed" | |
| }) | |
| # Update progress | |
| progress_bar.progress((i + 1) / len(lines)) | |
| status_text.text("β Translation complete!") | |
| # Display results | |
| for i, result in enumerate(results): | |
| with st.expander(f"Line {i+1}: {result['original'][:50]}..."): | |
| st.markdown(f"**Samaritan Hebrew:** {result['original']}") | |
| st.markdown(f"**Samaritan Aramaic:** {result['translation']}") | |
| # Download results | |
| csv_content = "Samaritan Hebrew,Samaritan Aramaic\n" | |
| for result in results: | |
| csv_content += f'"{result["original"]}","{result["translation"]}"\n' | |
| st.download_button( | |
| label="π₯ Download Results as CSV", | |
| data=csv_content, | |
| file_name="samaritan_translations.csv", | |
| mime="text/csv" | |
| ) | |
| except Exception as e: | |
| st.error(f"Error reading file: {str(e)}") | |
| # Footer | |
| st.markdown("---") | |
| st.markdown(""" | |
| <div style="text-align: center; color: #666; padding: 2rem;"> | |
| <p>Built with β€οΈ using Streamlit and Hugging Face Transformers</p> | |
| <p>Samaritan Hebrew to Samaritan Aramaic Translation</p> | |
| <p>Model: johnlockejrr/marianmt-he2arc-sam</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| if __name__ == "__main__": | |
| main() | |