wxy01giser commited on
Commit
c695e91
·
verified ·
1 Parent(s): 44b8192

Update docSim.py

Browse files
Files changed (1) hide show
  1. docSim.py +10 -0
docSim.py CHANGED
@@ -19,6 +19,16 @@ def semantic_similarity(text1, text2):
19
  emb2 = MODEL.encode(text2, normalize_embeddings=True)
20
  return float(util.cos_sim(emb1, emb2))
21
 
 
 
 
 
 
 
 
 
 
 
22
  def calcDocSims(file):
23
  texts = re.findall(r'text=(.*?),\s*error=', file, flags=re.DOTALL)
24
  res_list = [extract_core_painpoints(t) for t in texts]
 
19
  emb2 = MODEL.encode(text2, normalize_embeddings=True)
20
  return float(util.cos_sim(emb1, emb2))
21
 
22
+ def extract_core_painpoints(text):
23
+ pattern = r"核心痛点[::\s]*([\s\S]*?)优化措施[::\s]*"
24
+ m = re.search(pattern, text, flags=re.S)
25
+ if m:
26
+ res = m.group(1).strip()
27
+ # 去掉编号
28
+ res = re.sub(r"-?\s*核心教学痛点\d*[::]\s*", "", res)
29
+ return res
30
+ return ""
31
+
32
  def calcDocSims(file):
33
  texts = re.findall(r'text=(.*?),\s*error=', file, flags=re.DOTALL)
34
  res_list = [extract_core_painpoints(t) for t in texts]