init
Browse files
app.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from collections import Counter
|
| 4 |
+
import re
|
| 5 |
+
|
| 6 |
+
def ana(txt):
|
| 7 |
+
"""Analyze text and return comprehensive stats in dataframes"""
|
| 8 |
+
if not txt.strip():
|
| 9 |
+
return "Enter text!", None, None, None, None
|
| 10 |
+
|
| 11 |
+
wds = txt.split()
|
| 12 |
+
chs = len(txt)
|
| 13 |
+
wdc = len(wds)
|
| 14 |
+
sns = [s.strip() for s in re.split(r'[.!?]+', txt) if s.strip()]
|
| 15 |
+
snc = len(sns)
|
| 16 |
+
avg = wdc / max(1, snc)
|
| 17 |
+
rdm = wdc / 200
|
| 18 |
+
wrd = sum(len(w) for w in wds) / max(1, wdc) # avg word length
|
| 19 |
+
syl = sum(max(1, len(re.findall(r'[aeiouy]+', w.lower()))) for w in wds) # rough syllable count
|
| 20 |
+
fre = 206.835 - 1.015 * avg - 84.6 * (syl / max(1, wdc)) # Flesch Reading Ease
|
| 21 |
+
fkg = 0.39 * avg + 11.8 * (syl / max(1, wdc)) - 15.59 # Flesch-Kincaid Grade
|
| 22 |
+
|
| 23 |
+
# Add to stats dataframe
|
| 24 |
+
st = pd.DataFrame({
|
| 25 |
+
'📊 Metric': ['Characters', 'Words', 'Sentences', 'Avg Words/Sentence',
|
| 26 |
+
'Reading Time (min)', 'Readability Score', 'Grade Level'],
|
| 27 |
+
'📈 Value': [chs, wdc, snc, f'{avg:.1f}', f'{rdm:.1f}',
|
| 28 |
+
f'{fre:.1f}', f'{fkg:.1f}']
|
| 29 |
+
})
|
| 30 |
+
|
| 31 |
+
# Word frequency (1-grams)
|
| 32 |
+
wfq = Counter([w.lower().strip('.,!?;:"()[]') for w in wds if w.strip('.,!?;:"()[]')])
|
| 33 |
+
wf = pd.DataFrame(wfq.most_common(15), columns=['🔤 Word', '📊 Count'])
|
| 34 |
+
|
| 35 |
+
# Bi-grams
|
| 36 |
+
bgr = [f"{wds[i].lower()} {wds[i+1].lower()}" for i in range(len(wds)-1)]
|
| 37 |
+
bgc = Counter(bgr)
|
| 38 |
+
bg = pd.DataFrame(bgc.most_common(15), columns=['🔤 Bi-gram', '📊 Count'])
|
| 39 |
+
|
| 40 |
+
# Tri-grams
|
| 41 |
+
tgr = [f"{wds[i].lower()} {wds[i+1].lower()} {wds[i+2].lower()}" for i in range(len(wds)-2)]
|
| 42 |
+
tgc = Counter(tgr)
|
| 43 |
+
tg = pd.DataFrame(tgc.most_common(15), columns=['🔤 Tri-gram', '📊 Count'])
|
| 44 |
+
|
| 45 |
+
# Sentence analysis with actual sentences
|
| 46 |
+
sls = []
|
| 47 |
+
for i, sn in enumerate(sns, 1):
|
| 48 |
+
swc = len(sn.split())
|
| 49 |
+
sls.append({
|
| 50 |
+
'📝 #': i,
|
| 51 |
+
'📏 Words': swc,
|
| 52 |
+
'💬 Sentence': sn
|
| 53 |
+
})
|
| 54 |
+
sl = pd.DataFrame(sls)
|
| 55 |
+
|
| 56 |
+
# Character distribution
|
| 57 |
+
chd = Counter(txt.lower())
|
| 58 |
+
let = {c: chd[c] for c in chd if c.isalpha()}
|
| 59 |
+
let = dict(sorted(let.items(), key=lambda x: x[1], reverse=True)[:20])
|
| 60 |
+
ch = pd.DataFrame(list(let.items()), columns=['🔤 Letter', '📊 Count'])
|
| 61 |
+
|
| 62 |
+
return st, wf, bg, tg, sl, ch
|
| 63 |
+
|
| 64 |
+
# Gradio Interface
|
| 65 |
+
with gr.Blocks(theme=gr.themes.Soft(), css="""
|
| 66 |
+
.gradio-container {background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);}
|
| 67 |
+
h1, h3 {color: white !important;}
|
| 68 |
+
""") as app:
|
| 69 |
+
|
| 70 |
+
gr.Markdown("# 🚀 Text Analyzer Pro\n### Comprehensive text analysis with n-grams!")
|
| 71 |
+
|
| 72 |
+
inp = gr.Textbox(label="📝 Enter Your Text", placeholder="Type or paste text...", lines=8)
|
| 73 |
+
btn = gr.Button("🔍 Analyze", variant="primary", size="lg")
|
| 74 |
+
|
| 75 |
+
with gr.Row():
|
| 76 |
+
o1 = gr.Dataframe(label="📊 Overall Statistics", interactive=False)
|
| 77 |
+
o2 = gr.Dataframe(label="🎨 Character Distribution", interactive=False)
|
| 78 |
+
|
| 79 |
+
gr.Markdown("### 🔤 N-Gram Analysis")
|
| 80 |
+
with gr.Row():
|
| 81 |
+
o3 = gr.Dataframe(label="1-Grams (Words)", interactive=False)
|
| 82 |
+
o4 = gr.Dataframe(label="2-Grams (Phrases)", interactive=False)
|
| 83 |
+
o5 = gr.Dataframe(label="3-Grams (Phrases)", interactive=False)
|
| 84 |
+
|
| 85 |
+
gr.Markdown("### 📈 Sentence Analysis")
|
| 86 |
+
o6 = gr.Dataframe(label="📝 Sentences with Length", interactive=False, wrap=True)
|
| 87 |
+
|
| 88 |
+
btn.click(fn=ana, inputs=inp, outputs=[o1, o3, o4, o5, o6, o2])
|
| 89 |
+
|
| 90 |
+
gr.Examples(
|
| 91 |
+
examples=[
|
| 92 |
+
["The quick brown fox jumps over the lazy dog. This is a test sentence. Testing is important!"],
|
| 93 |
+
["Machine learning is transforming technology. Artificial intelligence powers innovation. Deep learning drives progress."]
|
| 94 |
+
],
|
| 95 |
+
inputs=inp,
|
| 96 |
+
label="💡 Try These Examples"
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
gr.Markdown("---\n*Built with ❤️ using Gradio | All variables ≤ 3 chars*")
|
| 100 |
+
|
| 101 |
+
if __name__ == "__main__":
|
| 102 |
+
app.launch()
|