def get_text(t):
m=t.split("text='")[1]
if len(m) >1:
m=m.split("', message")[0]
else:
return t
return m
# 提取包含特定词汇的句子
def getEglish(data):
pattern = r"\b[a-z]+(?:[A-Z][a-z]+)*\b"
matches = re.findall(pattern, data)
# 过滤掉包含特定词汇的字符串
unwanted_words = ['generations', 'text', 'message', 'content', 'run','n','my']
unwanted_pattern = r'\b(' + '|'.join(unwanted_words) + r')\b'
filtered_matches = [match for match in matches if not re.search(unwanted_pattern, match)]
seen = set()
unique_filtered_matches = [match for i, match in enumerate(filtered_matches) if
match not in seen and not seen.add(match)]
if unique_filtered_matches:
return unique_filtered_matches[0]
else:
return None