#單字符分割
def division(text,strip):
# 保留分割符 分隔符后換行
print('# 保留分割符 分隔符后換行')
list = []
str = ''
for i in text:
if i in strip:
str = '%s%s' % (str, i)
list.append(str)
str = ''
else:
str = '%s%s'%(str,i)
if str != '':
list.append(str)
return list
def independent(text,strip):
# 保留分割符 分隔符單獨
print('# 保留分割符 分隔符單獨')
list = []
str = ''
for i in text:
if i == strip:
if str != '':
list.append(str)
str = ''
str = '%s%s' % (str, i)
list.append(str)
str = ''
else:
str = '%s%s'%(str,i)
if str != '':
list.append(str)
return list
def independent1(text,strip):
# 不保留分隔符 分隔符處換行
print('# 不保留分隔符 分隔符處換行')
list = []
str = ''
for i in text:
if i == strip:
if str != '':
list.append(str)
str = ''
else:
str = '%s%s'%(str,i)
if str != '':
list.append(str)
return list
# 保留分割符 分隔符后換行 分隔符單獨
# 不保留分隔符 分隔符處換行
text = open('text.txt','r',encoding='utf-8').read()
strip = '\n'
d = independent1(text,strip)
print(len(d))
for i in d:
# print(i)
if '工程概況' in i[0:8]:
print(i)
# 功能:分割段落
# 缺點:必須是第一個字符開始匹配
class division():
def __init__(self,text):
self.text = text
# 多字符分割
def cutting(self,i, t):
data = ['章', '節']
if t == '第' and self.text[i + 1].isnumeric() == True and self.text[i + 2] in data:
print('滿足', t, self.text[i + 1], self.text[i + 2])
return True
def start(self):
str = ''
data = []
text = ''.join(self.text)
for i, t in enumerate(text):
if self.cutting(i, t) == True:
data.append(str)
str = ''
str = '%s' % t
else:
str = '%s%s' % (str, t)
data.append(str)