Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files
app.py
CHANGED
|
@@ -1,82 +1,50 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
import csv
|
| 3 |
|
| 4 |
-
#
|
| 5 |
-
LEXICON =
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
LEXICON.add(row[0].strip())
|
| 10 |
|
| 11 |
-
# ---
|
| 12 |
REVERSE_SANDHI_RULES = [
|
| 13 |
-
("ा", ["अ+अ"]),
|
| 14 |
-
("े", ["अ+इ", "अ+ई"]), # e
|
| 15 |
-
("ो", ["अ+उ", "अ+ऊ"]), # o
|
| 16 |
-
("त्त", ["त्+त", "त्+द"]),
|
| 17 |
("ः", ["ः+"]), # visarga restoration
|
| 18 |
-
("ं", ["म्+", "न्+"]), # anusvara restoration
|
| 19 |
]
|
| 20 |
|
| 21 |
def generate_candidates(word):
|
| 22 |
candidates = []
|
| 23 |
for i in range(1, len(word)):
|
| 24 |
left, right = word[:i], word[i:]
|
| 25 |
-
|
| 26 |
# Direct split
|
| 27 |
if left in LEXICON and right in LEXICON:
|
| 28 |
candidates.append((left, right))
|
| 29 |
-
|
| 30 |
-
# Rule-based reverse sandhi
|
| 31 |
for ch, expansions in REVERSE_SANDHI_RULES:
|
| 32 |
if left.endswith(ch):
|
| 33 |
for exp in expansions:
|
| 34 |
-
l_base = left[:-
|
| 35 |
r_base = exp.split("+")[1] + right
|
| 36 |
if l_base in LEXICON and r_base in LEXICON:
|
| 37 |
candidates.append((l_base, r_base))
|
| 38 |
-
|
| 39 |
candidates = list(set(candidates))
|
| 40 |
return candidates or [("No plausible split found", "")]
|
| 41 |
|
| 42 |
def sandhi_splitter(word):
|
| 43 |
-
|
| 44 |
-
if not word:
|
| 45 |
-
return "Please enter a word."
|
| 46 |
-
|
| 47 |
-
candidates = generate_candidates(word)
|
| 48 |
formatted = [" + ".join(c) for c in candidates]
|
| 49 |
return "\n".join(formatted)
|
| 50 |
|
| 51 |
-
# --- Gradio App ---
|
| 52 |
with gr.Blocks() as demo:
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
"2. Click **Split Sandhi** to see possible splits.\n"
|
| 60 |
-
"3. Candidate splits are based on a small dictionary and reverse sandhi rules.\n\n"
|
| 61 |
-
"**Contact:** For issues, mail **[email protected]**"
|
| 62 |
-
)
|
| 63 |
-
inp = gr.Textbox(label="Compound Word (e.g. धर्मक्षेत्रे)")
|
| 64 |
-
btn = gr.Button("Split Sandhi")
|
| 65 |
-
out = gr.Textbox(label="Candidate Splits", lines=5)
|
| 66 |
-
|
| 67 |
-
btn.click(fn=sandhi_splitter, inputs=inp, outputs=out)
|
| 68 |
-
|
| 69 |
-
with gr.Column(scale=1):
|
| 70 |
-
gr.Markdown(
|
| 71 |
-
"### How to Use This Tool\n"
|
| 72 |
-
"- Input any Sanskrit **compound** word.\n"
|
| 73 |
-
"- Works best with **Devanagari script**.\n"
|
| 74 |
-
"- Multiple possible splits may appear.\n\n"
|
| 75 |
-
"### Notes\n"
|
| 76 |
-
"- Uses **rule-based splitting** + lexicon check.\n"
|
| 77 |
-
"- Starter lexicon included (~20 entries); extend with full MW dictionary for accuracy.\n\n"
|
| 78 |
-
"**Support:** [email protected]"
|
| 79 |
-
)
|
| 80 |
|
| 81 |
if __name__ == "__main__":
|
| 82 |
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
|
|
|
| 2 |
|
| 3 |
+
# --- Minimal Sanskrit lexicon (extend with real data) ---
|
| 4 |
+
LEXICON = {
|
| 5 |
+
"राम", "वन", "गच्छति", "गुरु", "इन्द्र", "तत्", "अपि",
|
| 6 |
+
"धर्म", "क्षेत्र", "कुरु", "क्षेत्रे"
|
| 7 |
+
}
|
|
|
|
| 8 |
|
| 9 |
+
# --- Basic Reverse Sandhi Rules ---
|
| 10 |
REVERSE_SANDHI_RULES = [
|
| 11 |
+
("ा", ["अ+अ"]), # ā → a + a
|
| 12 |
+
("े", ["अ+इ", "अ+ई"]), # e → a+i or a+ī
|
| 13 |
+
("ो", ["अ+उ", "अ+ऊ"]), # o → a+u or a+ū
|
|
|
|
| 14 |
("ः", ["ः+"]), # visarga restoration
|
|
|
|
| 15 |
]
|
| 16 |
|
| 17 |
def generate_candidates(word):
|
| 18 |
candidates = []
|
| 19 |
for i in range(1, len(word)):
|
| 20 |
left, right = word[:i], word[i:]
|
|
|
|
| 21 |
# Direct split
|
| 22 |
if left in LEXICON and right in LEXICON:
|
| 23 |
candidates.append((left, right))
|
| 24 |
+
# Apply reverse sandhi substitutions
|
|
|
|
| 25 |
for ch, expansions in REVERSE_SANDHI_RULES:
|
| 26 |
if left.endswith(ch):
|
| 27 |
for exp in expansions:
|
| 28 |
+
l_base = left[:-1] + exp.split("+")[0]
|
| 29 |
r_base = exp.split("+")[1] + right
|
| 30 |
if l_base in LEXICON and r_base in LEXICON:
|
| 31 |
candidates.append((l_base, r_base))
|
| 32 |
+
# Deduplicate
|
| 33 |
candidates = list(set(candidates))
|
| 34 |
return candidates or [("No plausible split found", "")]
|
| 35 |
|
| 36 |
def sandhi_splitter(word):
|
| 37 |
+
candidates = generate_candidates(word.strip())
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
formatted = [" + ".join(c) for c in candidates]
|
| 39 |
return "\n".join(formatted)
|
| 40 |
|
|
|
|
| 41 |
with gr.Blocks() as demo:
|
| 42 |
+
gr.Markdown("## Sanskrit Sandhi-Splitter (Prototype)")
|
| 43 |
+
gr.Markdown("Enter a Sanskrit compound word (Devanagari) to see possible splits.")
|
| 44 |
+
inp = gr.Textbox(label="Compound Word (e.g. धर्मक्षेत्रे)")
|
| 45 |
+
out = gr.Textbox(label="Candidate Splits")
|
| 46 |
+
btn = gr.Button("Split Sandhi")
|
| 47 |
+
btn.click(fn=sandhi_splitter, inputs=inp, outputs=out)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
if __name__ == "__main__":
|
| 50 |
demo.launch()
|