Spaces:
Build error
Build error
Merge branch 'main' of https://huggingface.co/spaces/CarperAI/pile-v2-eda
Browse files
app.py
CHANGED
|
@@ -14,7 +14,7 @@ else:
|
|
| 14 |
contribution_json = "contributors.json"
|
| 15 |
|
| 16 |
contribution_dict = json.load(open(contribution_json,"r"))
|
| 17 |
-
IGNORE_LIST = ["Bible","Tanzil",""]
|
| 18 |
|
| 19 |
splits = [split for split in os.listdir(CACHE_DIR) if split not in IGNORE_LIST]
|
| 20 |
|
|
@@ -44,17 +44,18 @@ def load_page(split):
|
|
| 44 |
meta = data["meta"]
|
| 45 |
with st.expander("Render Content"):
|
| 46 |
st.write(content)
|
| 47 |
-
st.
|
| 48 |
-
|
| 49 |
-
st.
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
|
|
|
| 54 |
#Word related count
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
|
| 59 |
|
| 60 |
demo_name = st.sidebar.selectbox("Choose a demo", splits)
|
|
|
|
| 14 |
contribution_json = "contributors.json"
|
| 15 |
|
| 16 |
contribution_dict = json.load(open(contribution_json,"r"))
|
| 17 |
+
IGNORE_LIST = ["Bible","Tanzil","GNOME"]
|
| 18 |
|
| 19 |
splits = [split for split in os.listdir(CACHE_DIR) if split not in IGNORE_LIST]
|
| 20 |
|
|
|
|
| 44 |
meta = data["meta"]
|
| 45 |
with st.expander("Render Content"):
|
| 46 |
st.write(content)
|
| 47 |
+
with st.expander("Raw Content"):
|
| 48 |
+
st.text(content)
|
| 49 |
+
with st.expander("Metadata and Metrics"):
|
| 50 |
+
st.write("### Meta:")
|
| 51 |
+
st.write(ast.literal_eval(meta))
|
| 52 |
+
# Tokenizer-related count
|
| 53 |
+
tokenized = tokenizer(content, return_length=True)['length'][0]
|
| 54 |
+
token_count_metric = st.metric("Token Count(compared to 2048)",value=tokenized,delta=4096-tokenized)
|
| 55 |
#Word related count
|
| 56 |
+
split_words = re.findall(r'\w+', content)
|
| 57 |
+
word_count_metric = st.metric("Word Count",value=len(split_words))
|
| 58 |
+
|
| 59 |
|
| 60 |
|
| 61 |
demo_name = st.sidebar.selectbox("Choose a demo", splits)
|