Spaces:
Build error
Build error
danseith
commited on
Commit
·
85ace6b
1
Parent(s):
1ca245c
Won't count iteration if no changes are made.
Browse files
app.py
CHANGED
|
@@ -30,9 +30,10 @@ def add_mask(text, size=1):
|
|
| 30 |
if '[MASK]' in split_text:
|
| 31 |
return text
|
| 32 |
idx = np.random.randint(len(split_text), size=size)
|
|
|
|
| 33 |
for i in idx:
|
| 34 |
split_text[i] = '[MASK]'
|
| 35 |
-
return ' '.join(split_text)
|
| 36 |
|
| 37 |
|
| 38 |
class TempScalePipe(FillMaskPipeline):
|
|
@@ -135,7 +136,7 @@ scrambler = pipeline("temp-scale", model="anferico/bert-for-patents")
|
|
| 135 |
def unmask(text, temp, rounds):
|
| 136 |
sampling = 'multi'
|
| 137 |
for round in range(rounds):
|
| 138 |
-
text = add_mask(text, size=1)
|
| 139 |
split_text = text.split()
|
| 140 |
res = scrambler(text, temp=temp, top_k=10)
|
| 141 |
mask_pos = [i for i, t in enumerate(split_text) if 'MASK' in t][0]
|
|
@@ -148,7 +149,7 @@ def unmask(text, temp, rounds):
|
|
| 148 |
idx = np.random.randint(0, len(score_list))
|
| 149 |
score = score_list[idx]
|
| 150 |
new_token = score_to_str[score]
|
| 151 |
-
if len(list(new_token)) < 2:
|
| 152 |
continue
|
| 153 |
split_text[mask_pos] = '*' + new_token + '*'
|
| 154 |
text = ' '.join(split_text)
|
|
|
|
| 30 |
if '[MASK]' in split_text:
|
| 31 |
return text
|
| 32 |
idx = np.random.randint(len(split_text), size=size)
|
| 33 |
+
masked = split_text[idx]
|
| 34 |
for i in idx:
|
| 35 |
split_text[i] = '[MASK]'
|
| 36 |
+
return ' '.join(split_text), masked
|
| 37 |
|
| 38 |
|
| 39 |
class TempScalePipe(FillMaskPipeline):
|
|
|
|
| 136 |
def unmask(text, temp, rounds):
|
| 137 |
sampling = 'multi'
|
| 138 |
for round in range(rounds):
|
| 139 |
+
text, masked = add_mask(text, size=1)
|
| 140 |
split_text = text.split()
|
| 141 |
res = scrambler(text, temp=temp, top_k=10)
|
| 142 |
mask_pos = [i for i, t in enumerate(split_text) if 'MASK' in t][0]
|
|
|
|
| 149 |
idx = np.random.randint(0, len(score_list))
|
| 150 |
score = score_list[idx]
|
| 151 |
new_token = score_to_str[score]
|
| 152 |
+
if len(list(new_token)) < 2 or new_token == masked:
|
| 153 |
continue
|
| 154 |
split_text[mask_pos] = '*' + new_token + '*'
|
| 155 |
text = ' '.join(split_text)
|