-
Notifications
You must be signed in to change notification settings - Fork 1
/
incomplete_phrases.py
executable file
·64 lines (59 loc) · 1.68 KB
/
incomplete_phrases.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python3
def phrase_incomplete(phrase):
if len(phrase) == 1:
return False
ls = []
for w in phrase:
for tg in w.split():
if tg[0] == '#':
slf, hd = tg[1:].split('->')
ls.append((int(slf), int(hd)))
minwd = ls[0][0]
maxwd = ls[-1][0]
selfhd = 0
external = 0
for s, h in ls:
if h < minwd or h > maxwd:
external += 1
elif h == s:
selfhd += 1
#if (selfhd + external) > 1:
if selfhd > 1:
return f'{minwd}-{maxwd}'
return False
def sentence_incomplete(sent, bd):
ls = []
cur = []
for ln in sent.splitlines():
if ln[0] == ';':
if cur and bd in ln:
ls.append(cur[:])
cur = []
elif ln[0] == '\t':
cur[-1] += ' ' + ln.strip()
else:
cur.append(ln.strip())
if cur:
ls.append(cur)
for p in ls:
r = phrase_incomplete([l for l in p if '@punct' not in l])
if r:
return r
return False
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('book', action='store')
parser.add_argument('-s', '--start', action='store', type=int, default=1)
parser.add_argument('-l', '--level', choices=['pb', 'cb', 'sb'], default='pb')
args = parser.parse_args()
with open(f'temp/parsed-cg3/{args.book}.txt') as fin:
sents = fin.read().strip().split('\n\n')
n = 0
for i, s in enumerate(sents, 1):
r = sentence_incomplete(s, args.level)
if r:
n += 1
if n == args.start + 10:
break
if n >= args.start:
print(f'{n}\t{i}\t{r}')