import difflib
def stri_similar(s1,s2):
return difflib.SequenceMatcher(None,s1,s2).quick_ratio()
data1 = '你好啊'
data2 = '你好'
# for i in range(len(data1)):
# s1 = data1[i]
# s2 = data2[i]
# print(stri_similar(s1,s2) )
# print(stri_similar(data1,data2) )
#相似比對比
#順序對比法:兩個列表按順序對比 缺點:計算量小 ; 優點:相似度對比準確度低
#遍歷對比法:一個列表遍歷另一個列表的全部,一段對比每一段。 缺點:計算量大 ; 優點:相似度對比更有準確度
d = ['長短搭配,', '盡量減少接頭,', '以節約鋼材。']
text = ['搭配,', '盡少接頭,', '以節約鋼材。']
def contrast(text,d):
try:
count = 0
for i in range(len(text)): #遍歷段落
count1 = 0
if len(text[i]) > len(d[i]):
c = text[i]
t = d[i]
else :
c = d[i]
t = text[i]
for p in c: #遍歷段落字符
if p in t:
count1 += 1
count += count1/len(c)
# print('{:.2f} {:.2%}'.format(count,count/len(text)))
return count/len(text)
except:
print('報錯:列表長度不一樣')
# contrast(text,d)
def contrast2(list1,list2):
count = 0
for i in list1:
count1 = 0
for t in list2:
count1 += contrast([t],[i])
count += count1
print('總量:{:.2f} 相似度:{:.2%}'.format(count,count/len(list1)))
list1 = ['長短搭配,', '盡量減少接頭,', '以節約鋼材。']
list2 = ['長短搭配,','盡量減少接頭,']
contrast2(list1,list2)
# contrast(list1,list2)