Spaces:
Running
Running
Aymeric Roucher
commited on
Commit
·
db61c57
1
Parent(s):
37c61d6
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,15 +1,33 @@
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
iface = gr.Interface(
|
| 7 |
-
fn=
|
| 8 |
inputs="text",
|
| 9 |
outputs=gr.HighlightedText(
|
| 10 |
-
label="
|
| 11 |
-
combine_adjacent=
|
| 12 |
show_legend=True,
|
| 13 |
-
color_map={"
|
| 14 |
)
|
| 15 |
iface.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from haystack.nodes import PreProcessor
|
| 3 |
+
from haystack import Document
|
| 4 |
|
| 5 |
+
preprocessor = PreProcessor(
|
| 6 |
+
clean_empty_lines=True,
|
| 7 |
+
clean_whitespace=True,
|
| 8 |
+
clean_header_footer=True,
|
| 9 |
+
remove_substrings=None,
|
| 10 |
+
split_by="word",
|
| 11 |
+
split_length=200,
|
| 12 |
+
split_respect_sentence_boundary=True,
|
| 13 |
+
split_overlap=0,
|
| 14 |
+
max_chars_check: int = 10_000
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
def chunk(text):
|
| 18 |
+
splits = preprocessor.process(Document(text))
|
| 19 |
+
|
| 20 |
+
return [
|
| 21 |
+
(i%3, split.content) for i, split in enumerate(splits)
|
| 22 |
+
]
|
| 23 |
|
| 24 |
iface = gr.Interface(
|
| 25 |
+
fn=chunk,
|
| 26 |
inputs="text",
|
| 27 |
outputs=gr.HighlightedText(
|
| 28 |
+
label="Highlights",
|
| 29 |
+
combine_adjacent=False,
|
| 30 |
show_legend=True,
|
| 31 |
+
color_map={"0": "red", "1": "green", "2": "yellow"}),
|
| 32 |
)
|
| 33 |
iface.launch()
|