hackerbyhobby
commited on
changes
Browse files
app.py
CHANGED
|
@@ -10,6 +10,7 @@ import requests
|
|
| 10 |
import json
|
| 11 |
import os
|
| 12 |
import numpy as np
|
|
|
|
| 13 |
|
| 14 |
# Translator instance
|
| 15 |
translator = GoogleTranslator(source="auto", target="es")
|
|
@@ -26,17 +27,8 @@ model_name = "joeddav/xlm-roberta-large-xnli"
|
|
| 26 |
classifier = pipeline("zero-shot-classification", model=model_name)
|
| 27 |
CANDIDATE_LABELS = ["SMiShing", "Other Scam", "Legitimate"]
|
| 28 |
|
| 29 |
-
# 3. SHAP
|
| 30 |
-
|
| 31 |
-
original_invariants = shap.maskers._text.Text.invariants
|
| 32 |
-
|
| 33 |
-
def patched_invariants(self, *args):
|
| 34 |
-
return np.zeros(len(self._tokenized_s), dtype=np.bool_) # Use np.bool_ instead
|
| 35 |
-
|
| 36 |
-
shap.maskers._text.Text.invariants = patched_invariants
|
| 37 |
-
|
| 38 |
-
# SHAP explainer setup
|
| 39 |
-
explainer = shap.Explainer(classifier)
|
| 40 |
|
| 41 |
# Retrieve the Google Safe Browsing API key from the environment
|
| 42 |
SAFE_BROWSING_API_KEY = os.getenv("SAFE_BROWSING_API_KEY")
|
|
@@ -147,6 +139,9 @@ def explain_classification(text):
|
|
| 147 |
"""
|
| 148 |
Generate SHAP explanations for the classification.
|
| 149 |
"""
|
|
|
|
|
|
|
|
|
|
| 150 |
shap_values = explainer([text])
|
| 151 |
shap.force_plot(
|
| 152 |
explainer.expected_value[0], shap_values[0].values[0], shap_values[0].data
|
|
@@ -218,23 +213,4 @@ demo = gr.Interface(
|
|
| 218 |
inputs=[
|
| 219 |
gr.Textbox(
|
| 220 |
lines=3,
|
| 221 |
-
label="Paste
|
| 222 |
-
placeholder="Type or paste the message here..."
|
| 223 |
-
),
|
| 224 |
-
gr.Image(
|
| 225 |
-
type="pil",
|
| 226 |
-
label="Or Upload a Screenshot (Optional)"
|
| 227 |
-
)
|
| 228 |
-
],
|
| 229 |
-
outputs="json",
|
| 230 |
-
title="SMiShing & Scam Detector with Safe Browsing",
|
| 231 |
-
description="""
|
| 232 |
-
This tool classifies messages as SMiShing, Other Scam, or Legitimate using a zero-shot model
|
| 233 |
-
(joeddav/xlm-roberta-large-xnli). It automatically detects if the text is Spanish or English.
|
| 234 |
-
It uses SHAP for explainability and checks URLs against Google's Safe Browsing API for enhanced analysis.
|
| 235 |
-
""",
|
| 236 |
-
flagging_mode="never"
|
| 237 |
-
)
|
| 238 |
-
|
| 239 |
-
if __name__ == "__main__":
|
| 240 |
-
demo.launch()
|
|
|
|
| 10 |
import json
|
| 11 |
import os
|
| 12 |
import numpy as np
|
| 13 |
+
from shap.maskers import Text
|
| 14 |
|
| 15 |
# Translator instance
|
| 16 |
translator = GoogleTranslator(source="auto", target="es")
|
|
|
|
| 27 |
classifier = pipeline("zero-shot-classification", model=model_name)
|
| 28 |
CANDIDATE_LABELS = ["SMiShing", "Other Scam", "Legitimate"]
|
| 29 |
|
| 30 |
+
# 3. SHAP Explainer Setup
|
| 31 |
+
explainer = shap.Explainer(classifier, masker=Text(tokenizer=classifier.tokenizer))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
# Retrieve the Google Safe Browsing API key from the environment
|
| 34 |
SAFE_BROWSING_API_KEY = os.getenv("SAFE_BROWSING_API_KEY")
|
|
|
|
| 139 |
"""
|
| 140 |
Generate SHAP explanations for the classification.
|
| 141 |
"""
|
| 142 |
+
if not text.strip():
|
| 143 |
+
raise ValueError("Cannot generate SHAP explanations for empty text.")
|
| 144 |
+
|
| 145 |
shap_values = explainer([text])
|
| 146 |
shap.force_plot(
|
| 147 |
explainer.expected_value[0], shap_values[0].values[0], shap_values[0].data
|
|
|
|
| 213 |
inputs=[
|
| 214 |
gr.Textbox(
|
| 215 |
lines=3,
|
| 216 |
+
label="Paste
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|