carlosmendeznlp

Set up repository

Showing 126 changed files with 2174 additions and 0 deletions
1 +How to run
2 +
3 +$ bash $ISIMP/simplify.sh -json [INPUT] [OUTPUT]
4 +
5 +You can place files anywhere and run the above commend. The INPUT file should contain plain text. The script will simplify the text and output the simplification constructs (with location information) in JSON format. For other usage, please try
6 +
7 +$bash $ISIMP/simplify.sh -help
8 +usage: Console [OPTIONS] [INPUT] [OUTPUT]
9 + Tag the POS, parse the sentences, and detect simplification
10 + constructs in the sentences.
11 + By default, assume the document is not tokenized and
12 + sentence-splited. Therefore, these two tasks will be done
13 + first.
14 + -help display this help and exit
15 + -json print file in JSON format. If not set, print file in plain
16 + text format
17 + -parse_only If set, parse the document only
18 + -tokenized set input tokenized. If not set, assume the document is not
19 + tokenized and ssplited.
20 +
21 +
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
This file is too large to display.
No preview for this file type
1 +// (VP) (CC) (VP NP) --> (VP CC VP) NP
2 +tregex: VP=p <1 (VP=c1 <: VBN|VBZ|VBD|VBP|VBG=cc1) <2 CC|CONJP <3 (VP=v2 <1 ~cc1=cc2 <2 NP|S=others) <- =v2
3 +operation: move others $- p
4 +
5 +// (VP) (CC) (VP NP) (...) --> (VP CC VP) NP (...)
6 +tregex: VP=p <1 (VP=c1 <: VBN|VBZ|VBD|VBP|VBG=cc1) <2 CC|CONJP=cc <3 (VP=v2 <1 ~cc1=cc2 <2 NP|S=others) !<- =v2
7 +operation: move cc $- cc1, move cc2 $- cc, move c1 $+ others
8 +
9 +tregex: VP=p <1 (VBN|VBZ|VBD|VBP|VBG=cc1) <2 CC|CONJP <3 (~cc1=c2) <4 NP=others <- =others
10 +operation: move others $- p
11 +
12 +//tregex: NP=p <1 NNS=nns <2 /,/ <3 NN=n1 <4 CC=cc <5 NN=n2
13 +//operation: adjoinF (NP @) n1, move cc $- n1, move n2 $- cc
14 +
15 +tregex: VP=p < (VBN|VBZ|VBD|VBP|VBG=cc1 $+ (CC|CONJP $+ (~cc1=c2)) !>1 =p)
16 +operation: move others $- p
1 +// apposition
2 +// 33
3 +tregex: NP=p <1 (@/N.*/=np1 !<< /,/ !<<, DT) <2 /,/ <3 (@/N.*/=np2 !<< /,/ <<, DT|CD) !< CC <- =np2
4 +// 22
5 +tregex: NP=p <1 (@/N.*/=np1 !<< /,/ !<<, DT) <2 /,/ <3 (@/N.*/=np2 !<< /,/ <<, DT) !< CC <- /,/
6 +// 17
7 +tregex: NP=p <1 (@/N.*/=np1 !<< /,/ <<, DT) <2 /,/ <3 (@/N.*/=np2 !<< /,/ !<< DT) !< CC <- =np2
8 +// 21
9 +tregex: NP=p <1 (@/N.*/=np1 !<< /,/ <<, DT) <2 /,/ <3 (@/N.*/=np2 !<< /,/ !<< DT) !< CC <- /,/
10 +// 33
11 +tregex: NP=p < (NP=np1 $++ (PP <1 (VBG < /including/) <2 NP=np2))
12 +// 4
13 +tregex: NP=p <1 (NP=np1 <1 CD) <2 /,/ <3 NP=np2 <4 /,/=end <- =end
14 +// 3
15 +tregex: NP=p <1 (NP=np1 << CD <2 (PP <<, /of/)) <2 /,/ <3 NP=np2
16 +
17 +// NP(DT ...) NP(NN)
18 +// 8
19 +tregex: NP=p <1 (NP=np1 << DT) <2 (NP=np2 <: NN)
20 +
21 +// the NP , a NP
22 +tregex: NP=p <1 (NP=np1 <<, /the/) <2 /,/ <3 (NP=np2 <<, /^(a|an)_/)
23 +
24 +// DT ... NNS NP(no CC)
25 +// 4
26 +tregex: NP=p <1 DT|JJ <-1 (NP=np2 !< CC !< /,/ $- NNS)
27 +
28 +// DT NP(... NNS) NP
29 +// 0
30 +tregex: NP=p <1 DT|JJ <2 (NP <- NNS) <3 (NP=np2 !< CC !< /,/) <- =np2
31 +
32 +// NP : NP
33 +// 16
34 +tregex: NP=p <1 NP=np1 <2 /:/ <3 (NP=np2 !<< CC !<< /,/) <- =np2
35 +
36 +// NNS , NN CC NN
1 +// apposition
2 +// 33
3 +tregex: NP=p <1 (@/N.*/=np1 !<< /,/ !<<, DT) <2 /,/ <3 (@/N.*/=np2 !<< /,/ <<, DT|CD) !< CC <- =np2
4 +// 22
5 +tregex: NP=p <1 (@/N.*/=np1 !<< /,/ !<<, DT) <2 /,/ <3 (@/N.*/=np2 !<< /,/ <<, DT) !< CC <- /,/
6 +// 17
7 +tregex: NP=p <1 (@/N.*/=np1 !<< /,/ <<, DT) <2 /,/ <3 (@/N.*/=np2 !<< /,/ !<< DT) !< CC <- =np2
8 +// 21
9 +tregex: NP=p <1 (@/N.*/=np1 !<< /,/ <<, DT) <2 /,/ <3 (@/N.*/=np2 !<< /,/ !<< DT) !< CC <- /,/
10 +// 33
11 +tregex: NP=p < (NP=np1 $++ (PP <1 (VBG < /including/) <2 NP=np2))
12 +// 4
13 +tregex: NP=p <1 (NP=np1 <1 CD) <2 /,/ <3 NP=np2 <4 /,/=end <- =end
14 +// 3
15 +tregex: NP=p <1 (NP=np1 << CD <2 (PP <<, /of/)) <2 /,/ <3 NP=np2
16 +operation: replace p np1
17 +operation: replace p np2
18 +
19 +// NP(DT ...) NP(NN)
20 +// 8
21 +tregex: NP=p <1 (NP=np1 << DT) <2 (NP=np2 <: NN)
22 +operation: replace p np1
23 +operation: replace p np2
24 +
25 +// the NP , a NP
26 +tregex: NP=p <1 (NP=np1 <<, /the/) <2 /,/ <3 (NP=np2 <<, /^(a|an)_/)
27 +operation: replace p np1
28 +operation: replace p np2
29 +
30 +// DT ... NNS NP(no CC)
31 +// 4
32 +tregex: NP=p <1 DT|JJ <-1 (NP=np2 !< CC !< /,/ $- NNS)
33 +operation: replace p np2
34 +operation: prune np2
35 +
36 +// DT NP(... NNS) NP
37 +// 0
38 +tregex: NP=p <1 DT|JJ <2 (NP <- NNS) <3 (NP=np2 !< CC !< /,/) <- =np2
39 +operation: replace p np2
40 +operation: prune np2
41 +
42 +// NP : NP
43 +// 16
44 +tregex: NP=p <1 NP=np1 <2 /:/ <3 (NP=np2 !<< CC !<< /,/) <- =np2
45 +operation: replace p np1
46 +operation: replace p np2
1 +// vbn coordination
2 +tregex: VP=p < (VBN|VBZ|VBD|VBP|VBG|VB=cc1 $+ (CC|CONJP=cc $+ (~cc1=cc2)) !>1 =p)
3 +operation: adjoinF (VP @) cc1, move cc $- cc1, move cc2 $- cc
4 +
5 +tregex: VP=p < (VBN|VBZ|VBD|VBP|VBG|VB=cc1 $+ (CC|CONJP=cc $+ (~cc1=cc2 $+ __)))
6 +operation: adjoinF (VP @) cc1, move cc $- cc1, move cc2 $- cc
7 +
8 +// np , (np ... cc np)
9 +tregex: NP=np <: NN|NNS=n1 $+ (/,/=comma $+ (NP <1 (NN|NNS=n2 $++ (CC [$+ NN|NNS | $+ (NP <: NN|NNS)] ))))
10 +operation: move comma $+ n2, move n1 $+ comma, prune np
11 +
12 +// NN , NN
13 +tregex: NP=np <1 NN=nn1 <2 /,/ <3 NN=nn2 <- =nn2
14 +operation: excise np np
1 +// mrna
2 +tregex: NP=p < ((NP=c1 <<- /mRNA/=mrna) $+ (CC=cc $+ (NP=c2 <<, /protein|surface/=protein)))
3 +tregex: NP=p < ((NN=c2 <<: /surface/=protein) $+ (CC=cc $+ (NN=c1 <<: /mRNA/=mrna)))
4 +tregex: NP=p < ((NP=c1 <<- /protein|surface/=protein) $+ (CC=cc $+ (NP=c2 <<, /mRNA/=mrna)))
5 +
6 +// NP cc NP (mRNA)
7 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 CC <3 (NP=c2 <1 __=nn <2 (NN <<: /mRNA/))
8 +
9 +// NP , NP cc NP (mRNA)
10 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 CC <5 (NP=c3 <1 __=nn <2 (NN <<: /mRNA/))
11 +
12 +// NP , NP , cc NP (mRNA)
13 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 CC <6 (NP==c3 <1 __=nn <2 (NN <<: /mRNA/))
14 +
15 +// NP , NP , NP cc NP (mRNA)
16 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 CC <7 (NP=c4 <1 __=nn <2 (NN <<: /mRNA/))
17 +
18 +// NP , NP , NP , cc NP (mRNA)
19 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 /,/ <7 CC <8 (NP=c4 <1 __=nn <2 (NN <<: /mRNA/))
20 +
21 +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=c2 <1 (NN=n5) <- =n5) <- =c2
22 +
23 +tregex: ADJP=n0 <1 (ADJP=c1 <1 (JJ=n2) <- =n2) <2 (CC=n3) <3 (ADJP=c2 <1 (JJ=n5) <- =n5) <- =c2
24 +
25 +// 162
26 +tregex: NP=n0 <1 (CC=n1) <2 (NP=c1 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=c2 <1 (NN=n6) <- =n6) <- =c2
27 +
28 +// 90
29 +tregex: ADJP=n0 <1 (CC=n1) <2 (ADJP=c1 <1 (JJ=n3) <- =n3) <3 (CC=n4) <4 (ADJP=c2 <1 (JJ=n6) <- =n6) <- =c2
30 +
31 +// 78
32 +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <- =n2) <2 (/,/=n3) <3 (NP=c2 <1 (NN=n5) <- =n5) <4 (/,/=n6) <5 (CC=n7) <6 (NP=c3 <1 (NN=n9) <- =n9) <- =c3
33 +
34 +// CD CC CD
35 +tregex: QP=n0 <1 (CD=c1) <2 (CC=n2) <3 (CD=c2) <- =c2
36 +
37 +// PRP CC PRP
38 +tregex: NP=n0 <1 (PRP=c1) <2 (CC=n2) <3 (PRP=c2) <- =c2
39 +
40 +// DT NP:NN CC NP:NN
41 +tregex: NP=n0 <1 (DT=n1) <2 (NP=c1 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=c2 <1 (NN=n6) <- =n6) <- =c2
42 +
43 +// NP:NNS CC NP:NNS
44 +tregex: NP=n0 <1 (NP=c1 <1 (NNS=n2) <- =n2) <2 (CC=n3) <3 (NP=c2 <1 (NNS=n5) <- =n5) <- =c2
45 +
46 +// NP:NN CC NP:NN
47 +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=c2 <1 (NN=n5) <- =n5) <4 (NP=n6 <1 (NNS=n7) <- =n7) <- =n6
48 +
49 +// 30
50 +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (NN=c2) <2 (NNS=n6) <- =n6) <- =n4
51 +
52 +// 29
53 +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=c2 <1 (JJ=n5) <2 (NNS=n6) <- =n6) <- =c2
54 +
55 +// 28
56 +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <2 (NN=n3) <3 (NN=c1) <- =c1) <2 (CC=n5) <3 (NP=c2 <1 (NN=n7) <- =n7) <- =c2
57 +
58 +// 27
59 +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=c2 <1 (NN=n5) <- =n5) <4 (NP=n6 <1 (NN=n7) <- =n7) <- =n6
60 +
61 +// 25
62 +tregex: NP=n0 <1 (NP=c1 <1 (CD=n2) <- =n2) <2 (CC=n3) <3 (NP=c2 <1 (CD=n5) <- =n5) <- =c2
63 +
64 +// 24
65 +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <- =n2) <2 (/,/=n3) <3 (NP=c2 <1 (NN=n5) <- =n5) <4 (/,/=n6) <5 (NP=c3 <1 (NN=n8) <- =n8) <6 (/,/=n9) <7 (CC=n10) <8 (NP=c4 <1 (NN=n12) <- =n12) <- =c4
66 +
67 +// 23
68 +tregex: ADVP=n0 <1 (ADVP=c1 <1 (RB=n2) <- =n2) <2 (CC=n3) <3 (ADVP=c2 <1 (RB=n5) <- =n5) <- =c2
69 +
70 +// 20
71 +tregex: ADJP=n0 <1 (ADJP=c1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (ADJP=c2 <1 (JJ=n5) <- =n5) <- =c2
72 +
73 +// 20
74 +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <- =n2) <2 (/,/=n3) <3 (NP=c2 <1 (NN=n5) <- =n5) <4 (CC=n6) <5 (NP=c3 <1 (NN=n8) <- =n8) <- =c3
75 +
76 +// 21
77 +tregex: NP=n0 <1 (NP=c1 <1 (JJ=n2) <2 (NN=n3) <- =n3) <2 (CC=n4) <3 (NP=c2 <1 (JJ=n6) <2 (NN=n7) <- =n7) <- =c2
78 +
79 +// 17
80 +tregex: NP=n0 <1 (NN=c1) <2 (CC=n2) <3 (NN=c2) <4 (NNS=n4) <- =n4
81 +
82 +// 17
83 +tregex: NP=n0 <1 (NP=c1 <1 (NNP=n2) <- =n2) <2 (CC=n3) <3 (NP=c2 <1 (NNP=n5) <- =n5) <- =c2
84 +
85 +// 16
86 +tregex: NP=n0 <1 (NP=c1 <1 (JJ=n2) <2 (NN=n3) <3 (NN=n4) <- =n4) <2 (CC=n5) <3 (NP=c2 <1 (NN=n7) <- =n7) <- =c2
87 +
88 +// 16
89 +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <2 (NN=n3) <3 (NN=n4) <- =n4) <2 (CC=n5) <3 (NP=c2 <1 (NN=n7) <2 (NN=n8) <3 (NN=n9) <- =n9) <- =c2
90 +
91 +// 15
92 +tregex: NP=n0 <1 (NNS=n1) <2 (CD=c1) <3 (CC=n3) <4 (CD=c2) <- =c2
93 +
94 +// 13
95 +tregex: NP=n0 <1 (NP=c1 <1 (NNS=n2) <- =n2) <2 (CC=n3) <3 (NP=c2 <1 (NN=n5) <2 (NNS=n6) <- =n6) <- =c2
96 +
97 +// 13
98 +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <2 (NNS=c1) <- =c1) <2 (CC=n4) <3 (NP=n5 <1 (NNS=c2) <- =c2) <- =c2
99 +
100 +// 12
101 +tregex: NP=n0 <1 (CC=n1) <2 (NP=n2 <1 (JJ=n3) <2 (NN=c1) <- =c1) <3 (CC=n5) <4 (NP=n6 <1 (NN=c2) <- =c2) <- =n6
102 +
103 +// 11
104 +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <- =n2) <2 (/,/=n3) <3 (NP=c2 <1 (NN=n5) <- =n5) <4 (/,/=n6) <5 (CC=n7) <6 (NP=n8 <1 (NN=c3) <2 (NNS=n10) <- =n10) <- =n8
105 +
106 +// 11
107 +tregex: NP=n0 <1 (NP=c1 <1 (JJ=n2) <2 (NNS=n3) <- =n3) <2 (CC=n4) <3 (NP=c2 <1 (JJ=n6) <2 (NNS=n7) <- =n7) <- =c2
108 +
109 +// 11
110 +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <2 (NNS=n3) <- =n3) <2 (CC=n4) <3 (NP=c2 <1 (NN=n6) <2 (NNS=n7) <- =n7) <- =c2
111 +
112 +// 11
113 +tregex: NP=n0 <1 (NP=n1 <1 (JJ=n2) <2 (NNS=c1) <- =c1) <2 (CC=n4) <3 (NP=c2 <1 (NNS=n6) <- =n6) <- =c2
114 +
115 +// 10
116 +tregex: NP=n0 <1 (JJ=n1) <2 (NP=c1 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=c2 <1 (NN=n6) <- =n6) <- =c2
117 +
118 +// 10
119 +tregex: NP=n0 <1 (CC=n1) <2 (NP=c1 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=n5 <1 (JJ=c2) <2 (NNS=n7) <- =n7) <- =n5
120 +
121 +// 10
122 +tregex: NP=n0 <1 (DT=n1) <2 (NN=c1) <3 (CC=n3) <4 (NN=c2) <5 (NNS=n5) <- =n5
123 +
124 +// 10
125 +tregex: NP=n0 <1 (CC=n1) <2 (NP=c1 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=n5 <1 (NN=c2) <2 (NNS=n7) <- =n7) <- =n5
126 +
127 +// 10
128 +tregex: NP=n0 <1 (DT=n1) <2 (NP=c1 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=c2 <1 (NN=n6) <- =n6) <5 (NP=n7 <1 (NNS=n8) <- =n8) <- =n7
129 +
130 +// 10
131 +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=c2 <1 (NN=n5) <- =n5) <4 (NP=n6 <1 (NN=n7) <2 (NNS=n8) <- =n8) <- =n6
132 +
133 +// 2 (DT NN CC NN)
134 +tregex: NP=p <1 /CC|DT|JJ|VBN|PRP\$/ <2 (/NP|NN/=c1) <3 CC|CONJP=cc <4 (/NP|NN/=c2) <- =c2
135 +tregex: NP=p <1 /CC|DT/ <2 JJ=c1 <3 CC=cc <4 JJ=c2
136 +// 2 NN CC NN NNS: 17
137 +tregex: NP=p <1 /NP|NN/=c1 <2 CC|CONJP=cc <3 (/NP|NN/=c2 !< PP) <4 __=end <- =end
138 +tregex: NP=p <1 CC <2 /NP|NN/=c1 <3 CC|CONJP=cc <4 (/NP|NN/=c2 !< PP) <5 __=end <- =end
139 +// 2 NN CC NN NN NN
140 +tregex: NP=p <1 /NP|NN/=c1 <2 CC|CONJP=cc <3 /NP|NN/=c2 <4 /NP|NN/ <5 /NP|NN/=end <- =end
141 +// 2 VBN NN CC NN NN: 12
142 +tregex: NP=p <1 /CC|DT|JJ|VBN|PRP\$/ <2 /NP|NN/=c1 <3 CC|CONJP=cc <4 /NP|NN/=c2 <5 /NP|NN/ <6 /NP|NN/=end <- =end
143 +// 2 DT NP CC NP NP
144 +tregex: NP=p <1 /CC|DT|JJ|VBN|PRP\$/ <2 (/NP|NN/=c1) <3 CC|CONJP=cc <4 (/NP|NN/=c2) <5 (/NP|NN/=end) <- =end
145 +// 2 NP CC NP NP: 92
146 +tregex: NP <1 (NP=c1 <: NN) <2 CC|CONJP=cc <3 (NP=c2 <: NN) <4 NP=end <- =end
147 +// 2 NN CD CC CD: 20
148 +tregex: NP <2 (CD=c1) <3 CC|CONJP=cc <4 (CD=c2) <- =c2
149 +
150 +// 2 CC DT NP CC NP NP
151 +tregex: NP <1 (CC=cc1) <2 (DT) <3 (NP=c1 <: (NN)) <4 (CC=cc2) <5 (NP=c2 <: (NN)) <6 (NP=end <: (NNS)) <- =end
152 +
153 +// 2 (JJ NN) CC NN
154 +tregex: NP=p <1 (NP <1 JJ=jj <2 /NN|NNS/=c1 <- =c1) <2 CC|CONJP <3 (NP=c2 <: /NN|NNS/=nn2) <- =c2
155 +// 2 NN NN CC NN: 129
156 +tregex: NP=p <1 (NP <1 NN=jj <2 /NN|NNS/=c1 <- =c1) <2 CC|CONJP <3 (NP=c2 <: /NN|NNS/=nn2) <- =c2
157 +
158 +// 2
159 +tregex: VP=p <1 (VP=c1 <: VBN|VBZ|VBD|VBP|VBG=cc1) <2 CC|CONJP <3 (VP=v2 <1 ~cc1=c2 <2 __=others) <- =v2
160 +tregex: VP=p <1 (VP=c1 <: VBN|VBZ|VBD|VBP|VBG=cc1) <2 CC|CONJP <3 (VP=v2 <1 ~cc1=c2)
161 +
162 +// 2 NP CC NP PP
163 +tregex: NP=p <1 (NP=c1 !< PP !<< /secretion/) <2 CC|CONJP=cc <3 (NP <1 NP=c2 <2 PP=pp <- =pp)
164 +tregex: NP=p <1 (NP=c1 !< PP !<< /secretion/) <2 /,/ <3 CC|CONJP=cc <4 (NP <1 NP=c2 <2 PP=pp <- =pp)
165 +tregex: NP=p <1 CC <2 (NP=c1 !< PP !<< /secretion/) <3 CC|CONJP=cc <4 (NP <1 NP=c2 <2 PP=pp <- =pp)
166 +
167 +// 2 NN CC (NN NNS): 151
168 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 CC|CONJP <3 (NP=n2 <1 NN=c2 <2 NN|NNS=nns)
169 +// 2 CC NN CC (NN NNS): 30
170 +tregex: NP=p <1 (CC) <2 (NP=c1 <: /NN|NNS/=nn1) <3 CC|CONJP <4 (NP=n2 <1 /JJ|VBN|NN/=c2 <2 /NN|NNS/=nns) <- =n2
171 +
172 +// 2 JJ CC NP(NN NNS)
173 +tregex: NP=p <1 (NP <1 (ADJP=c1 <: (JJ=nn1))) <2 (CC) <3 (NP=n2 <1 (NN=c2) <2 (NNS=nns) <- =nns) <- =n2
174 +
175 +// 2 NP(NN NN NN) CC NP(NN)
176 +tregex: NP=p <1 (NP <1 (NN|DT|JJ) <2 (NN|JJ) <3 (NN=c1) <- =c1) <2 (CC) <3 (NP=c2 <: (NN=nn2)) <- =c2
177 +
178 +// 2 NP(JJ NN NN) CC NP(NN NN)
179 +tregex: NP=p <1 (NP=c1 <1 (JJ) <2 (NN=nn3) <3 (NN=nn1) <- =nn1) <2 (CC) <3 (NP=c2 <1 (NN=nn4) <2 (NN=nn2) <- =nn2) <- =c2
180 +
181 +// 2 ADVP PP , CC PP
182 +tregex: PP=p <1 ADVP <2 PP=c1 <3 /,/ <4 CC=cc <5 PP=c2 <- =c2
183 +tregex: NP=p <1 (NP=c1 !<< DT) <2 /,/ <3 (NP=c2 !<< DT) <- =c2
184 +tregex: VP=p <1 (VP=c1 << NP) <2 CC|CONJP <3 (VP=c2 <- NP)
185 +tregex: NP=p <1 (NP=c1 <: NN) <2 CC|CONJP <3 (NP=c2 <1 DT)
186 +//UCP=p <1 (ADJP=c1 <: (JJ)) <2 (CC) <3 (NP=c2 <: (NN)) <- =c2
187 +tregex: NP=p <1 (CC) <2 (NP=c1 <1 (DT) <2 (NN)) <3 (CC) <4 (NP=c2 <1 (DT) <2 (NN) <3 (NN)) <- =c2
188 +
189 +// 2 NP CC NP NP(NNS)
190 +tregex: NP=p <1 NP=c1 <2 CC|CONJP=cc <3 NP=c2 <4 (NP <- NNS)
191 +
192 +// 2 NP(NN) CC NP(JJ JJ NN)
193 +tregex: NP=p <1 (NP=c1 <: (NN=nn1)) <2 (CC) <3 (NP=c2 <1 (JJ=jj1) <2 (JJ=jj2) <3 (NN=nn2) <- =nn2) <- =c2
194 +
195 +// 2 NP(ADJP) CC NP(NN NNS)
196 +tregex: NP=p <1 (NP=c1 <: (ADJP <: (JJ=nn1))) <2 (CC) <3 (NP=n2 <1 (NN=c2) <2 (NNS=nns) <- =nns) <- =n2
197 +
198 +// 2 NN NN NP CC NP(NN)
199 +tregex: NP=p <1 (NN) <2 (NN) <3 (NP=c1 <1 (NN)) <4 (CC=cc) <5 (NP=c2 <: (NN)) <- =c2
200 +
201 +// 2 NN CC NN NN NN NN
202 +tregex: NP=p <1 (NN=c1) <2 (CC=cc) <3 (NN=c2) <4 (NN) <5 (NN) <6 (NN)
203 +
204 +// 3
205 +tregex: VP=p <1 (VP=c1 <: VBN|VBZ|VBD|VBP|VBG=cc1) <2 /,/ <3 (VP=c2 <: ~cc1=cc2) <4 /,/ <5 CC|CONJP <6 (VP=v2 <1 ~cc1=c3 <2 __=others) <- =v2
206 +
207 +//3 NN , NN , CC (NN NN): 19
208 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 CC|CONJP <5 (NP <1 NN=c3 <2 NNS=nn4 <- =nn4)
209 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 CC|CONJP <6 (NP <1 NN=c3 <2 NNS=nn4 <- =nn4)
210 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 CC|CONJP <5 (NP <1 NN=c3 <2 (NN=nn4 <<: /mRNA/) <- =nn4)
211 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 CC|CONJP <6 (NP <1 NN=c3 <2 (NN=nn4 <<: /mRNA/) <- =nn4)
212 +
213 +//3 NP , NP , CC NP NP: 5
214 +tregex: NP <1 (NP=c1 <: NN) <2 /,/=comma1 <3 (NP=c2 <: NN) <4 /,/=comma2 <5 CC|CONJP=cc <6 (NP=c3 <: NN) <7 NP=end <- =end
215 +
216 +//3 NN , NN , CC NN NN NNS: 2
217 +tregex: NP <1 NN=c1 <2 /,/=comma1 <3 NN=c2 <4 /,/=comma2 <5 CC|CONJP=cc <6 NN=c3 <7 NN <8 /NNS/=end <- =end
218 +
219 +// 3 NP(JJ NN) , NP , CC NP
220 +tregex: NP=p <1 (NP=c1 <1 (JJ) <2 (NN=nn1)) <2 (/,/) <3 (NP <1 (NN=nn2)) <4 (/,/) <5 (CC) <6 (NP=c3 <1 (NN=nn3)) <- c3
221 +
222 +// 4
223 +tregex: NP=p <1 /NP|NN/=c1 <2 /,/=comma1 <3 /NP|NN/=c2 <4 /,/=comma2 <5 /NP|NN/=c3 <6 CC|CONJP=cc <7 /NP|NN/=c4 <8 /NP|NN/=end <- =end
224 +
225 +// 4
226 +tregex: NP=p <1 (NP=c1 <1 (NN) <2 (NN)) <2 (/,/) <3 (NP=c2 <1 (NN)) <4 (/,/) <5 (NP=c3 <1 (NN)) <6 (/,/) <7 (CC) <8 (NP=c4 <1 (NN)) <- =c4
227 +
228 +//4 NN , NN , NN , CC NN NN: 4
229 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 CC|CONJP <7 (NP <1 NN=c4 <2 NNS=nn5 <- =nn5)
230 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 /,/ <7 CC|CONJP <8 (NP <1 NN=c4 <2 NNS=nn5 <- =nn5)
231 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 CC|CONJP <7 (NP <1 NN=c4 <2 (NN=nn5 <<: /mRNA/) <- =nn5)
232 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 /,/ <7 CC|CONJP <8 (NP <1 NN=c4 <2 (NN=nn5 <<: /mRNA/) <- =nn5)
233 +
234 +tregex: (NP=n6 <1 (NP=c1) <2 (/,/=n9) <3 (NP=c2) <4 (/,/=n12) <5 (NP=c3) <6 (/,/=n15) <7 (NP=c4) <8 (/,/=n18) <9 (NP=c5) <10 (/,/=n21) <11 (NP=c6) <- =c6)
235 +
236 +tregex: (NP=n6 <1 (NP=c1) <2 (/,/=n9) <3 (NP=c2) <4 (/,/=n12) <5 (NP=c3) <6 (/,/=n15) <7 (NP=c4) <8 (/,/=n18) <9 (NP=c5) <- =c5)
237 +
238 +tregex: (NP=n6 <1 (NP=c1) <2 (/,/=n9) <3 (NP=c2) <4 (/,/=n12) <5 (NP=c3) <6 (/,/=n15) <7 (NP=c4) <- =c4)
239 +
240 +tregex: (NP=n6 <1 (NP=c1) <2 (/,/=n9) <3 (NP=c2) <4 (/,/=n12) <5 (NP=c3) <- =c3)
241 +
242 +//1 PP ; PP ; PP cc PP
243 +tregex: (PP=n6 <1 (PP=c1) <3 (PP=c2) <5 (PP=c3) <7 (PP=c4) <- =c4)
244 +
1 +// mrna
2 +tregex: NP=p < ((NP <<- /mRNA/=mrna) $+ (CC=cc $+ (NP <<, /protein|surface/=protein)))
3 +tregex: NP=p < ((NN <<: /surface/=protein) $+ (CC=cc $+ (NN <<: /mRNA/=mrna)))
4 +tregex: NP=p < ((NP <<- /protein|surface/=protein) $+ (CC=cc $+ (NP <<, /mRNA/=mrna)))
5 +operation: prune mrna cc
6 +operation: prune cc protein
7 +
8 +// NP cc NP (mRNA)
9 +tregex: NP=p <1 (NP <: NN=nn1) <2 CC <3 (NP=np2 <1 __=nn <2 (NN <<: /mRNA/))
10 +operation: replace p np2
11 +operation: replace nn nn1, replace p np2
12 +
13 +// NP , NP cc NP (mRNA)
14 +tregex: NP=p <1 (NP <: NN=nn1) <2 /,/ <3 (NP <: NN=nn2) <4 CC <5 (NP=np2 <1 __=nn <2 (NN <<: /mRNA/))
15 +operation: replace p np2
16 +operation: replace nn nn1, replace p np2
17 +operation: replace nn nn2, replace p np2
18 +
19 +// NP , NP , cc NP (mRNA)
20 +tregex: NP=p <1 (NP <: NN=nn1) <2 /,/ <3 (NP <: NN=nn2) <4 /,/ <5 CC <6 (NP=np2 <1 __=nn <2 (NN <<: /mRNA/))
21 +operation: replace p np2
22 +operation: replace nn nn1, replace p np2
23 +operation: replace nn nn2, replace p np2
24 +
25 +// NP , NP , NP cc NP (mRNA)
26 +tregex: NP=p <1 (NP <: NN=nn1) <2 /,/ <3 (NP <: NN=nn2) <4 /,/ <5 (NP <: NN=nn3) <6 CC <7 (NP=np2 <1 __=nn <2 (NN <<: /mRNA/))
27 +operation: replace p np2
28 +operation: replace nn nn1, replace p np2
29 +operation: replace nn nn2, replace p np2
30 +operation: replace nn nn3, replace p np2
31 +
32 +// NP , NP , NP , cc NP (mRNA)
33 +tregex: NP=p <1 (NP <: NN=nn1) <2 /,/ <3 (NP <: NN=nn2) <4 /,/ <5 (NP <: NN=nn3) <6 /,/ <7 CC <8 (NP=np2 <1 __=nn <2 (NN <<: /mRNA/))
34 +operation: replace p np2
35 +operation: replace nn nn1, replace p np2
36 +operation: replace nn nn2, replace p np2
37 +operation: replace nn nn3, replace p np2
38 +
39 +// 892
40 +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (NN=n5) <- =n5) <- =n4
41 +operation: replace n0 n1
42 +operation: replace n0 n4
43 +
44 +// 308
45 +tregex: ADJP=n0 <1 (ADJP=n1 <1 (JJ=n2) <- =n2) <2 (CC=n3) <3 (ADJP=n4 <1 (JJ=n5) <- =n5) <- =n4
46 +operation: replace n0 n1
47 +operation: replace n0 n4
48 +
49 +// 162
50 +tregex: NP=n0 <1 (CC=n1) <2 (NP=n2 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=n5 <1 (NN=n6) <- =n6) <- =n5
51 +operation: replace n0 n2
52 +operation: replace n0 n5
53 +
54 +// 90
55 +tregex: ADJP=n0 <1 (CC=n1) <2 (ADJP=n2 <1 (JJ=n3) <- =n3) <3 (CC=n4) <4 (ADJP=n5 <1 (JJ=n6) <- =n6) <- =n5
56 +operation: replace n0 n2
57 +operation: replace n0 n5
58 +
59 +// 78
60 +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (/,/=n3) <3 (NP=n4 <1 (NN=n5) <- =n5) <4 (/,/=n6) <5 (CC=n7) <6 (NP=n8 <1 (NN=n9) <- =n9) <- =n8
61 +operation: replace n0 n1
62 +operation: replace n0 n4
63 +operation: replace n0 n8
64 +
65 +// 77
66 +tregex: QP=n0 <1 (CD=n1) <2 (CC=n2) <3 (CD=n3) <- =n3
67 +operation: replace n0 n1
68 +operation: replace n0 n3
69 +
70 +// 68
71 +tregex: NP=n0 <1 (DT=n1) <2 (NP=n2 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=n5 <1 (NN=n6) <- =n6) <- =n5
72 +operation: replace n0 n2
73 +operation: replace n0 n5
74 +
75 +// 47
76 +tregex: NP=n0 <1 (NP=n1 <1 (NNS=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (NNS=n5) <- =n5) <- =n4
77 +operation: replace n0 n1
78 +operation: replace n0 n4
79 +
80 +// 38
81 +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (NN=n5) <- =n5) <4 (NP=n6 <1 (NNS=n7) <- =n7) <- =n6
82 +operation: prune n1 n3
83 +operation: prune n3 n4
84 +
85 +// 30
86 +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (NN=n5) <2 (NNS=n6) <- =n6) <- =n4
87 +operation: replace n0 n4
88 +operation: replace n5 n2, replace n0 n4
89 +
90 +// 29
91 +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (JJ=n5) <2 (NNS=n6) <- =n6) <- =n4
92 +operation: replace n0 n4
93 +operation: replace n5 n2, replace n0 n4
94 +
95 +// 28
96 +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <2 (NN=n3) <3 (NN=n4) <- =n4) <2 (CC=n5) <3 (NP=n6 <1 (NN=n7) <- =n7) <- =n6
97 +operation: replace n0 n1
98 +operation: replace n4 n7, replace n0 n1
99 +
100 +// 27
101 +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (NN=n5) <- =n5) <4 (NP=n6 <1 (NN=n7) <- =n7) <- =n6
102 +operation: prune n1 n3
103 +operation: prune n3 n4
104 +
105 +// 25
106 +tregex: NP=n0 <1 (NP=n1 <1 (CD=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (CD=n5) <- =n5) <- =n4
107 +operation: replace n0 n1
108 +operation: replace n0 n4
109 +
110 +// 24
111 +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (/,/=n3) <3 (NP=n4 <1 (NN=n5) <- =n5) <4 (/,/=n6) <5 (NP=n7 <1 (NN=n8) <- =n8) <6 (/,/=n9) <7 (CC=n10) <8 (NP=n11 <1 (NN=n12) <- =n12) <- =n11
112 +operation: replace n0 n1
113 +operation: replace n0 n4
114 +operation: replace n0 n7
115 +operation: replace n0 n11
116 +
117 +// 23
118 +tregex: ADVP=n0 <1 (ADVP=n1 <1 (RB=n2) <- =n2) <2 (CC=n3) <3 (ADVP=n4 <1 (RB=n5) <- =n5) <- =n4
119 +operation: replace n0 n1
120 +operation: replace n0 n4
121 +
122 +// 20
123 +tregex: ADJP=n0 <1 (ADJP=n1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (ADJP=n4 <1 (JJ=n5) <- =n5) <- =n4
124 +operation: replace n0 n1
125 +operation: replace n0 n4
126 +
127 +// 20
128 +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (/,/=n3) <3 (NP=n4 <1 (NN=n5) <- =n5) <4 (CC=n6) <5 (NP=n7 <1 (NN=n8) <- =n8) <- =n7
129 +operation: replace n0 n1
130 +operation: replace n0 n4
131 +operation: replace n0 n7
132 +
133 +// 21
134 +tregex: NP=n0 <1 (NP=n1 <1 (JJ=n2) <2 (NN=n3) <- =n3) <2 (CC=n4) <3 (NP=n5 <1 (JJ=n6) <2 (NN=n7) <- =n7) <- =n5
135 +operation: replace n0 n1
136 +operation: replace n0 n4
137 +
138 +// 17
139 +tregex: NP=n0 <1 (NN=n1) <2 (CC=n2) <3 (NN=n3) <4 (NNS=n4) <- =n4
140 +operation: prune n1 n2
141 +operation: prune n2 n3
142 +
143 +// 17
144 +tregex: NP=n0 <1 (NP=n1 <1 (NNP=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (NNP=n5) <- =n5) <- =n4
145 +operation: replace n0 n1
146 +operation: replace n0 n4
147 +
148 +// 16
149 +tregex: NP=n0 <1 (NP=n1 <1 (JJ=n2) <2 (NN=n3) <3 (NN=n4) <- =n4) <2 (CC=n5) <3 (NP=n6 <1 (NN=n7) <- =n7) <- =n6
150 +operation: replace n0 n1
151 +operation: replace n4 n7, replace n0 n1
152 +
153 +// 16
154 +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <2 (NN=n3) <3 (NN=n4) <- =n4) <2 (CC=n5) <3 (NP=n6 <1 (NN=n7) <2 (NN=n8) <3 (NN=n9) <- =n9) <- =n6
155 +operation: replace n0 n1
156 +operation: replace n0 n6
157 +
158 +// 15
159 +tregex: NP=n0 <1 (NNS=n1) <2 (CD=n2) <3 (CC=n3) <4 (CD=n4) <- =n4
160 +operation: prune n2 n3
161 +operation: prune n3 n4
162 +
163 +// 13
164 +tregex: NP=n0 <1 (NP=n1 <1 (NNS=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (NN=n5) <2 (NNS=n6) <- =n6) <- =n4
165 +operation: replace n0 n1
166 +operation: replace n0 n4
167 +
168 +// 13
169 +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <2 (NNS=n3) <- =n3) <2 (CC=n4) <3 (NP=n5 <1 (NNS=n6) <- =n6) <- =n5
170 +operation: replace n0 n1
171 +operation: replace n3 n6, replace n0 n1
172 +
173 +// 12
174 +tregex: NP=n0 <1 (CC=n1) <2 (NP=n2 <1 (JJ=n3) <2 (NN=n4) <- =n4) <3 (CC=n5) <4 (NP=n6 <1 (NN=n7) <- =n7) <- =n6
175 +operation: replace n0 n2
176 +operation: replace n4 n7, replace n0 n2
177 +
178 +// 11
179 +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (/,/=n3) <3 (NP=n4 <1 (NN=n5) <- =n5) <4 (/,/=n6) <5 (CC=n7) <6 (NP=n8 <1 (NN=n9) <2 (NNS=n10) <- =n10) <- =n8
180 +operation: replace n0 n8
181 +operation: replace n9 n2, replace n0 n8
182 +operation: replace n9 n5, replace n0 n8
183 +
184 +// 11
185 +tregex: NP=n0 <1 (NP=n1 <1 (JJ=n2) <2 (NNS=n3) <- =n3) <2 (CC=n4) <3 (NP=n5 <1 (JJ=n6) <2 (NNS=n7) <- =n7) <- =n5
186 +operation: replace n0 n1
187 +operation: replace n0 n5
188 +
189 +// 11
190 +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <2 (NNS=n3) <- =n3) <2 (CC=n4) <3 (NP=n5 <1 (NN=n6) <2 (NNS=n7) <- =n7) <- =n5
191 +operation: replace n0 n1
192 +operation: replace n0 n5
193 +
194 +// 11
195 +tregex: NP=n0 <1 (NP=n1 <1 (JJ=n2) <2 (NNS=n3) <- =n3) <2 (CC=n4) <3 (NP=n5 <1 (NNS=n6) <- =n6) <- =n5
196 +operation: replace n0 n1
197 +operation: replace n0 n5
198 +
199 +// 10
200 +tregex: NP=n0 <1 (JJ=n1) <2 (NP=n2 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=n5 <1 (NN=n6) <- =n6) <- =n5
201 +operation: prune n2 n4
202 +operation: prune n4 n5
203 +
204 +// 10
205 +tregex: NP=n0 <1 (CC=n1) <2 (NP=n2 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=n5 <1 (JJ=n6) <2 (NNS=n7) <- =n7) <- =n5
206 +operation: replace n0 n5
207 +operation: replace n6 n3, replace n0 n5
208 +
209 +// 10
210 +tregex: NP=n0 <1 (DT=n1) <2 (NN=n2) <3 (CC=n3) <4 (NN=n4) <5 (NNS=n5) <- =n5
211 +operation: prune n2 n3
212 +operation: prune n3 n4
213 +
214 +// 10
215 +tregex: NP=n0 <1 (CC=n1) <2 (NP=n2 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=n5 <1 (NN=n6) <2 (NNS=n7) <- =n7) <- =n5
216 +operation: replace n0 n5
217 +operation: replace n6 n2, replace n0 n5
218 +
219 +// 10
220 +tregex: NP=n0 <1 (DT=n1) <2 (NP=n2 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=n5 <1 (NN=n6) <- =n6) <5 (NP=n7 <1 (NNS=n8) <- =n8) <- =n7
221 +operation: prune n2 n4
222 +operation: prune n4 n5
223 +
224 +// 10
225 +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (NN=n5) <- =n5) <4 (NP=n6 <1 (NN=n7) <2 (NNS=n8) <- =n8) <- =n6
226 +operation: prune n1 n3
227 +operation: prune n3 n4
228 +
229 +// 2 (DT NN CC NN)
230 +tregex: NP=p <1 /CC|DT|JJ|VBN|PRP\$/ <2 (/NP|NN/=c1) <3 CC|CONJP=cc <4 (/NP|NN/=c2) <- =c2
231 +tregex: NP=p <1 /CC|DT/ <2 JJ=c1 <3 CC=cc <4 JJ=c2
232 +// 2 NN CC NN NNS: 17
233 +tregex: NP=p <1 /NP|NN/=c1 <2 CC|CONJP=cc <3 (/NP|NN/=c2 !< PP) <4 __=end <- =end
234 +tregex: NP=p <1 CC <2 /NP|NN/=c1 <3 CC|CONJP=cc <4 (/NP|NN/=c2 !< PP) <5 __=end <- =end
235 +// 2 NN CC NN NN NN
236 +tregex: NP=p <1 /NP|NN/=c1 <2 CC|CONJP=cc <3 /NP|NN/=c2 <4 /NP|NN/ <5 /NP|NN/=end <- =end
237 +// 2 VBN NN CC NN NN: 12
238 +tregex: NP=p <1 /CC|DT|JJ|VBN|PRP\$/ <2 /NP|NN/=c1 <3 CC|CONJP=cc <4 /NP|NN/=c2 <5 /NP|NN/ <6 /NP|NN/=end <- =end
239 +// 2 DT NP CC NP NP
240 +tregex: NP=p <1 /CC|DT|JJ|VBN|PRP\$/ <2 (/NP|NN/=c1) <3 CC|CONJP=cc <4 (/NP|NN/=c2) <5 (/NP|NN/=end) <- =end
241 +// 2 NP CC NP NP: 92
242 +tregex: NP <1 (NP=c1 <: NN) <2 CC|CONJP=cc <3 (NP=c2 <: NN) <4 NP=end <- =end
243 +// 2 NN CD CC CD: 20
244 +tregex: NP <2 (CD=c1) <3 CC|CONJP=cc <4 (CD=c2) <- =c2
245 +operation: prune cc c2
246 +operation: prune cc c1
247 +
248 +// 2 CC DT NP CC NP NP
249 +tregex: NP <1 (CC=cc1) <2 (DT) <3 (NP=c1 <: (NN)) <4 (CC=cc2) <5 (NP=c2 <: (NN)) <6 (NP=end <: (NNS)) <- =end
250 +operation: prune cc1 cc2 c2
251 +operation: prune cc1 cc2 c1
252 +
253 +// 2 (JJ NN) CC NN
254 +tregex: NP=p <1 (NP=c1 <1 JJ=jj <2 /NN|NNS/=end <- =end) <2 CC|CONJP <3 (NP=c2 <: /NN|NNS/=nn2) <- =c2
255 +// 2 NN NN CC NN: 129
256 +tregex: NP=p <1 (NP=c1 <1 NN=jj <2 /NN|NNS/=end <- =end) <2 CC|CONJP <3 (NP=c2 <: /NN|NNS/=nn2) <- =c2
257 +operation: replace p c1
258 +operation: move jj $+ nn2, replace p c2
259 +
260 +// 2
261 +tregex: VP=p <1 (VP=c1 <: VBN|VBZ|VBD|VBP|VBG=cc1) <2 CC|CONJP <3 (VP=c2 <1 ~cc1 <2 __=others) <- =c2
262 +operation: replace p c2
263 +operation: move others $- cc1, replace p c1
264 +
265 +// 2 NP CC NP PP
266 +tregex: NP=p <1 (NP=c1 !< PP !<< /secretion/) <2 CC|CONJP=cc <3 (NP=c2 <2 PP=pp <- =pp)
267 +tregex: NP=p <1 (NP=c1 !< PP !<< /secretion/) <2 /,/ <3 CC|CONJP=cc <4 (NP=c2 <2 PP=pp <- =pp)
268 +tregex: NP=p <1 CC <2 (NP=c1 !< PP !<< /secretion/) <3 CC|CONJP=cc <4 (NP=c2 <2 PP=pp <- =pp)
269 +operation: replace p c2
270 +operation: prune cc, replace c2 pp
271 +
272 +// 2 NN CC (NN NNS): 151
273 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 CC|CONJP <3 (NP=c2 <1 NN=nn2 <2 NN|NNS=nns)
274 +// 2 CC NN CC (NN NNS): 30
275 +tregex: NP=p <1 (CC) <2 (NP=c1 <: /NN|NNS/=nn1) <3 CC|CONJP <4 (NP=c2 <1 /JJ|VBN|NN/=nn2 <2 /NN|NNS/=nns) <- =c2
276 +operation: replace p c2
277 +operation: move nns $- nn1, replace p c1
278 +operation: replace p c1
279 +operation: replace p c2
280 +
281 +// 2 JJ CC NP(NN NNS)
282 +tregex: NP=p <1 (NP <1 (ADJP <: (JJ=nn1))) <2 (CC) <3 (NP=c2 <1 (NN=nn2) <2 (NNS=nns) <- =nns) <- =c2
283 +operation: replace p c2
284 +operation: replace nn2 nn1, replace p c2
285 +
286 +// 2 NP(NN NN NN) CC NP(NN)
287 +tregex: NP=p <1 (NP=c1 <1 (NN|DT|JJ) <2 (NN|JJ) <3 (NN=nn1) <- =nn1) <2 (CC) <3 (NP=c2 <: (NN=nn2)) <- =c2
288 +operation: replace p c1
289 +operation: replace nn1 nn2, replace p c1
290 +
291 +// 2 NP(JJ NN NN) CC NP(NN NN)
292 +tregex: NP=p <1 (NP=c1 <1 (JJ) <2 (NN=nn3) <3 (NN=nn1) <- =nn1) <2 (CC) <3 (NP=c2 <1 (NN=nn4) <2 (NN=nn2) <- =nn2) <- =c2
293 +operation: replace p c1
294 +operation: replace nn1 nn2, replace nn3 nn4, replace p c1
295 +
296 +// 2 ADVP PP , CC PP
297 +tregex: PP=p <1 ADVP <2 PP=c1 <3 /,/ <4 CC=cc <5 PP=c2 <- =c2
298 +tregex: NP=p <1 (NP=c1 !<< DT) <2 /,/ <3 (NP=c2 !<< DT) <- =c2
299 +tregex: VP=p <1 (VP=c1 << NP) <2 CC|CONJP <3 (VP=c2 <- NP)
300 +tregex: NP=p <1 (NP=c1 <: NN) <2 CC|CONJP <3 (NP=c2 <1 DT)
301 +//UCP=p <1 (ADJP=c1 <: (JJ)) <2 (CC) <3 (NP=c2 <: (NN)) <- =c2
302 +tregex: NP=p <1 (CC) <2 (NP=c1 <1 (DT) <2 (NN)) <3 (CC) <4 (NP=c2 <1 (DT) <2 (NN) <3 (NN)) <- =c2
303 +operation: replace p c1
304 +operation: replace p c2
305 +
306 +// 2 NP CC NP NP(NNS)
307 +tregex: NP=p <1 NP=c1 <2 CC|CONJP=cc <3 NP=c2 <4 (NP <- NNS)
308 +operation: prune c1 cc
309 +operation: prune cc c2
310 +
311 +// 2 NP(NN) CC NP(JJ JJ NN)
312 +tregex: NP=p <1 (NP=c1 <: (NN=nn1)) <2 (CC) <3 (NP=c2 <1 (JJ=jj1) <2 (JJ=jj2) <3 (NN=nn2) <- =nn2) <- =c2
313 +operation: replace p c2
314 +operation: prune jj1, replace jj2 nn1, replace p c2
315 +
316 +// 2 NP(ADJP) CC NP(NN NNS)
317 +tregex: NP=p <1 (NP=c1 <: (ADJP <: (JJ=nn1))) <2 (CC) <3 (NP=c2 <1 (NN==jj1) <2 (NNS=nns) <- =nns) <- =c2
318 +operation: replace p c2
319 +operation: replace jj1 nn1, replace p c2
320 +
321 +// 2 NN NN NP CC NP(NN)
322 +tregex: NP=p <1 (NN) <2 (NN) <3 (NP=c1 <1 (NN)) <4 (CC=cc) <5 (NP=c2 <: (NN)) <- =c2
323 +operation: prune c1 cc
324 +operation: prune cc c2
325 +
326 +// 2 NN CC NN NN NN NN
327 +tregex: NP=p <1 (NN=c1) <2 (CC=cc) <3 (NN=c2) <4 (NN) <5 (NN) <6 (NN)
328 +operation: prune c1 cc
329 +operation: prune cc c2
330 +
331 +// 2
332 +tregex: NP=n0 <1 (NP=n1 <: NN) <2 CC=cc <3 (NP=n2 <: NN) <4 NN <5 NN
333 +operation: prune n1 cc
334 +operation: prune cc n2
335 +
336 +// 2
337 +tregex: NP=n0 <1 (NP=n1 <: NN) <2 CC=cc <3 NN=n2 <4 (NN <<, /mRNA/) <5 NN
338 +operation: prune n1 cc
339 +operation: prune cc n2
340 +
341 +// 3
342 +tregex: VP=p <1 (VP=c1 <: VBN|VBZ|VBD|VBP|VBG=cc1) <2 /,/ <3 (VP=c2 <: ~cc1=cc2) <4 /,/ <5 CC|CONJP <6 (VP=c3 <1 ~cc1 <2 __=others) <- =c3
343 +operation: replace p c3
344 +operation: move others $- cc1, replace p c1
345 +operation: replace p c3
346 +operation: move others $- cc2, replace p c2
347 +
348 +//3 NN , NN , CC (NN NN): 19
349 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 CC|CONJP <5 (NP=c3 <1 NN=nn3 <2 NNS=nn4 <- =nn4)
350 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 CC|CONJP <6 (NP=c3 <1 NN=nn3 <2 NNS=nn4 <- =nn4)
351 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 CC|CONJP <5 (NP=c3 <1 NN=nn3 <2 (NN=nn4 <<: /mRNA/) <- =nn4)
352 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 CC|CONJP <6 (NP=c3 <1 NN=nn3 <2 (NN=nn4 <<: /mRNA/) <- =nn4)
353 +operation: replace p c3
354 +operation: move nn4 $- nn1, replace p c1
355 +operation: move nn4 $- nn2, replace p c2
356 +
357 +//3 NP , NP , CC NP NP: 5
358 +tregex: NP <1 (NP=c1 <: NN) <2 /,/=comma1 <3 (NP=c2 <: NN) <4 /,/=comma2 <5 CC|CONJP=cc <6 (NP=c3 <: NN) <7 NP=end <- =end
359 +operation: prune c2 cc c3 comma1 comma2
360 +operation: prune c1 cc c3 comma1 comma2
361 +operation: prune c1 c2 cc comma1 comma2
362 +
363 +//3 NN , NN , CC NN NN NNS: 2
364 +tregex: NP <1 NN=c1 <2 /,/=comma1 <3 NN=c2 <4 /,/=comma2 <5 CC|CONJP=cc <6 NN=c3 <7 NN <8 /NNS/=end <- =end
365 +operation: prune c2 cc c3 comma1 comma2
366 +operation: prune c1 cc c3 comma1 comma2
367 +operation: prune c1 c2 cc comma1 comma2
368 +
369 +// 3 NP(JJ NN) , NP , CC NP
370 +tregex: NP=p <1 (NP=c1 <1 (JJ) <2 (NN=nn1)) <2 (/,/) <3 (NP <1 (NN=nn2)) <4 (/,/) <5 (CC) <6 (NP=c3 <1 (NN=nn3)) <- c3
371 +operation: replace p c1
372 +operation: replace nn1 nn2, replace p c1
373 +operation: replace nn1 nn3, replace p c1
374 +
375 +// 4
376 +tregex: NP=p <1 /NP|NN/=c1 <2 /,/=comma1 <3 /NP|NN/=c2 <4 /,/=comma2 <5 /NP|NN/=c3 <6 CC|CONJP=cc <7 /NP|NN/=c4 <8 /NP|NN/=end <- =end
377 +operation: prune cc c1 c2 c3 comma1 comma2
378 +operation: prune cc c2 c3 c4 comma1 comma2
379 +operation: prune cc c3 c4 c1 comma1 comma2
380 +operation: prune cc c4 c1 c2 comma1 comma2
381 +
382 +// 4
383 +tregex: NP=p <1 (NP=c1 <1 (NN) <2 (NN)) <2 (/,/) <3 (NP=c2 <1 (NN)) <4 (/,/) <5 (NP=c3 <1 (NN)) <6 (/,/) <7 (CC) <8 (NP=c4 <1 (NN)) <- =c4
384 +operation: replace p c1
385 +operation: replace p c2
386 +operation: replace p c3
387 +operation: replace p c4
388 +
389 +//4 NN , NN , NN , CC NN NN: 4
390 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 CC|CONJP <7 (NP=c4 <1 NN=nn4 <2 NNS=nn5 <- =nn5)
391 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 /,/ <7 CC|CONJP <8 (NP=c4 <1 NN=nn4 <2 NNS=nn5 <- =nn5)
392 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 CC|CONJP <7 (NP=c4 <1 NN=nn4 <2 (NN=nn5 <<: /mRNA/) <- =nn5)
393 +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 /,/ <7 CC|CONJP <8 (NP=c4 <1 NN=nn4 <2 (NN=nn5 <<: /mRNA/) <- =nn5)
394 +operation: replace p c4
395 +operation: move nn5 $- nn1, replace p c1
396 +operation: move nn5 $- nn2, replace p c2
397 +operation: move nn5 $- nn2, replace p c3
398 +
399 +tregex: (NP=n6 <1 (NP=n7) <2 (/,/=n9) <3 (NP=n10) <4 (/,/=n12) <5 (NP=n13) <6 (/,/=n15) <7 (NP=n16) <8 (/,/=n18) <9 (NP=n19) <10 (/,/=n21) <11 (NP=n22) <- =n22)
400 +operation: replace n6 n7
401 +operation: replace n6 n10
402 +operation: replace n6 n13
403 +operation: replace n6 n16
404 +operation: replace n6 n19
405 +operation: replace n6 n22
406 +
407 +tregex: (NP=n6 <1 (NP=n7) <2 (/,/=n9) <3 (NP=n10) <4 (/,/=n12) <5 (NP=n13) <6 (/,/=n15) <7 (NP=n16) <8 (/,/=n18) <9 (NP=n19) <- =n19)
408 +operation: replace n6 n7
409 +operation: replace n6 n10
410 +operation: replace n6 n13
411 +operation: replace n6 n16
412 +operation: replace n6 n19
413 +
414 +tregex: (NP=n6 <1 (NP=n7) <2 (/,/=n9) <3 (NP=n10) <4 (/,/=n12) <5 (NP=n13) <6 (/,/=n15) <7 (NP=n16) <- =n16)
415 +operation: replace n6 n7
416 +operation: replace n6 n10
417 +operation: replace n6 n13
418 +operation: replace n6 n16
419 +
420 +tregex: (NP=n6 <1 (NP=n7) <2 (/,/=n9) <3 (NP=n10) <4 (/,/=n12) <5 (NP=n13) <- =n13)
421 +operation: replace n6 n7
422 +operation: replace n6 n10
423 +operation: replace n6 n13
424 +
425 +//1 PP ; PP ; PP cc PP
426 +tregex: (PP=n6 <1 (PP=n7) <3 (PP=n10) <5 (PP=n12) <7 (PP=n13) <- =n13)
427 +operation: replace n6 n7
428 +operation: replace n6 n10
429 +operation: replace n6 n12
430 +operation: replace n6 n13
1 +// identified NP as NP
2 +tregex: __=p < (VP <1 (/^VB.?$/ <<: /^(identif)(y|ies|ied|ying)|(recogniz|determin)(e|es|ed|ing)?|(regard|consider|view|treat)(s|ed|ing)?|(think|thought)|(map)(s|ped|ping)?|(reveal)(s|ed|ing)?|(disclos)(e|es|ed|ing)?$/) <2 (NP=hype !< IN) <3 (PP <1 (IN <<: /as/) <2 NP=hypo))
3 +tregex: __=p < (VP <1 (/^VB.?$/ <<: /^(identif)(y|ies|ied|ying)|(recogniz|determin)(e|es|ed|ing)?|(regard|consider|view|treat)(s|ed|ing)?|(think|thought)|(map)(s|ped|ping)?|(reveal)(s|ed|ing)?|(disclos)(e|es|ed|ing)?$/) <2 (NP <1 (NP=hype !< IN) <2 (PP <1 (IN <<: /as/) <2 NP=hypo)))
4 +
5 +// identified NP , as NP
6 +tregex: __=p < (VP <1 (/^VB.?$/ <<: /^(identif)(y|ies|ied|ying)|(recogniz|determin)(e|es|ed|ing)?|(regard|consider|view|treat)(s|ed|ing)?|(think|thought)|(map)(s|ped|ping)?|(reveal)(s|ed|ing)?|(disclos)(e|es|ed|ing)?$/) <2 (NP=hype !< IN) <3 /,/ <4 (PP <1 (IN <<: /as/) <2 NP=hypo))
7 +
8 +// NP was identified as NP
9 +tregex: S=p < (NP=hype $+ (VP <1 (/^VB.*$/ <<: /is|was|were|are/) <2 (VP <1 (/^VB.?$/ <<: /^identified|recognized|regarded|thought|mapped|revealed|known|disclosed$/) <2 (PP <1 (IN <<: /as/) <2 NP=hypo))))
10 +tregex: S=p < (NP=hype $+ (VP <1 (/^VB.*$/ <<: /is|was|were|are/) <2 (VP <1 (/^VB.?$/ <<: /^identified|recognized|regarded|thought|mapped|revealed|known|disclosed$/) <2 (PP <1 (IN <<: /as/) <2 NP=hypo))))
11 +
12 +// NP has been identified as NP
13 +tregex: S=p < (NP=hype $+ (VP <1 (/^VB.*$/ <<: /has|had|have/) <2 (VP <1 (/^VB.*$/ <<: /been/) <2 (VP <1 (/^VB.?$/ <<: /^identified|recognized|regarded|thought|mapped|revealed|known|disclosed$/) <2 (PP <1 (IN <<: /as/) <2 NP=hypo)))))
14 +tregex: S=p < (NP=hype $+ (VP <1 (/^VB.*$/ <<: /has|had|have/) <2 ADVP <3 (VP <1 (/^VB.*$/ <<: /been/) <2 (VP <1 (/^VB.?$/ <<: /^identified|recognized|regarded|thought|mapped|revealed|known|disclosed$/) <2 (PP <1 (IN <<: /as/) <2 NP=hypo)))))
15 +
16 +// NP , termed NP
17 +tregex: NP=p <1 (NP=hype) <2 /,/ <3 (VP <1 (/^VBN$/ <<: /^termed$/) <2 NP=hypo)
18 +tregex: VP=p <1 (VP <<- NP=hype) <2 /,/ <3 (VP <1 (/^VBN$/ <<: /^termed$/) <2 NP=hypo)
19 +
20 +// NP , known as NP
21 +tregex: NP=p <1 (NP=hype) <2 /,/ <3 (VP <1 (/^VBN$/ <<: /^known$/) <2 (PP <1 (IN << /as/) <2 NP=hypo))
22 +tregex: NP=p <1 (NP=hype) <2 PP <3 /,/ <4 (VP <1 (/^VBN$/ <<: /^known$/) <2 (PP <1 (IN << /as/) <2 NP=hypo))
23 +
24 +// NP is a NP
25 +tregex: S=p <1 NP=hype <2 (VP <1 (/^VB|VBZ|VBP$/ <<: /is|are/) <2 (NP=hypo <<, /^a|an$/))
26 +tregex: S=p <1 NP=hype <2 (VP <-1 (VP <1 (/^VB|VBZ|VBP$/ <<: /is|are|be|been/) <2 (NP=hypo <<, /^a|an$/)))
27 +
28 +// NP act|serves as NP
29 +tregex: S=p < (NP=hype $+ (VP <1 (/^VB|VBZ|VBP|VBD$/ <<: /(act|serve)(s|d|ed)?/) <2 (PP <1 (IN << /as/) <2 NP=hypo)))
30 +tregex: S=p < (NP=hype $+ (VP <1 (/^VB|VBZ|VBP|VBD$/ <<: /(act|serve)(s|d|ed)?/) <2 (SBAR <1 (IN << /as/) <2 S=hypo)))
31 +tregex: S=p < (NP=hype $+ (VP <1 MD <2 (VP <1 (/^VB|VBZ|VBP|VBD$/ <<: /(act|serve)/) <2 (PP <1 (IN << /as/) <2 NP=hypo))))
32 +
33 +// identification of NP as NP
34 +tregex: NP=p <1 (NP <<- /(i|I)dentification/) <2 (PP <1 (IN << /of/) <2 (NP <1 NP=hype <2 (PP <1 (IN <<: /as/) <2 NP=hypo)))
35 +tregex: __=p < (__ <<- (NP <1 (NP <<- /(i|I)dentification/) <2 (PP <1 (IN << /of/) <2 NP=hype)) $+ (PP <1 (IN <<: /as/) <2 NP=hypo))
36 +
1 +// NP such as NP
2 +tregex: __=p < (NP=tr $+ (PP <1 (JJ <: /such/) <2 (IN <: /as/) <3 NP=arg))
3 +tregex: __=p < ((PP <1 IN <-1 NP=tr) $+ (PP <1 (JJ <: /such/) <2 (IN <: /as/) <3 NP=arg))
4 +
5 +// NP , such as NP
6 +tregex: __=p < (NP=tr $+ (/,/ $+ (PP <1 (JJ <: /such/) <2 (IN <: /as/) <3 NP=arg)))
7 +tregex: __=p < (NP=tr $+ (/,/ $+ (ADJP <1 (JJ <: /such/) <2 (PP <1 (IN <: /as/) <2 NP=arg))))
8 +tregex: __=p < (NP=tr $+ (/,/ $+ (ADJP <1 (JJ <: /such/) <2 (PP <1 (IN <: /as/) <2 NP=arg))))
9 +tregex: __=p < (NP=tr $+ (/,/ $+ (CONJP <1 (JJ <: /such/) <2 (IN <: /as/) $+ (NP=arg))))
10 +tregex: __=p < ((PP <1 IN <-1 NP=tr) $+ (/,/ $+ (PP <1 (JJ <: /such/) <2 (IN <: /as/) <3 NP=arg)))
11 +tregex: __=p < ((VP <-1 (PP <-1 NP=tr)) $+ (/,/ $+ (PP <1 (JJ <: /such/) <2 (IN <: /as/) <3 NP=arg)))
12 +
13 +// NP , including NP
14 +tregex: __=p < (NP=tr $+ (/,/ $+ (PP <<, /including/ <2 NP=arg)))
15 +tregex: __=p < ((VP <-1 (PP <-1 NP=tr)) $+ (/,/ $+ (PP <<, /including/ <2 NP=arg)))
16 +tregex: __=p < ((VP <-1 NP=tr) $+ (/,/ $+ (PP <<, /including/ <2 NP=arg)))
17 +tregex: __=p < ((VP <-1 NP=tr) $+ (/,/ $+ (VP <<, /including/ <2 NP=arg)))
18 +tregex: __=p < ((PP <1 IN <-1 NP=tr) $+ (/,/ $+ (PP <<, /including/ <2 NP=arg)))
19 +
20 +// NP including NP
21 +tregex: __=p < (NP=tr $+ (PP <<, /including/ <2 NP=arg))
22 +tregex: __=p < (NP=tr $+ (VP <<, /including/ <2 NP=arg))
23 +tregex: __=p < ((PP <1 IN <-1 NP=tr) $+ (PP <<, /including/ <2 NP=arg))
24 +tregex: __=p < (NP=tr $+ (/:/ $+ (S <1 (VP <<, /including/ <2 NP=arg))))
25 +
26 +// NP which includes NP
27 +tregex: __=p < (NP=tr $+ (SBAR <<, /which|that/ <2 (S <1 (VP <<, /include/ <2 NP=arg))))
28 +
29 +// NP , which includes NP
30 +tregex: __=p < (NP=tr $+ (/,/ $+ (SBAR <<, /which/ <2 (S <1 (VP <<, /include/ <2 NP=arg)))))
31 +tregex: __=p < ((PP <1 IN <-1 NP=tr) $+ (/,/ $+ (SBAR <<, /which/ <2 (S <1 (VP <<, /include/ <2 NP=arg)))))
32 +
33 +// classes of NP
34 +tregex: NP=p <1 (NP=tr <<- /class|variety/ !< PP) <2 (PP <<, /of/ <2 (NP=arg !< PP))
35 +tregex: NP=p <1 (NP=tr <<- /class|variety/ !< PP) <2 (PP <<, /of/ <2 (NP <1 NP=arg < PP))
1 +tregex: /,/=comma $+ SBAR=sbar
2 +operation: prune comma sbar
3 +operation: new sbar
4 +
5 +tregex: /,/=comma $+ /UCP/=ucp
6 +operation: prune comma ucp
7 +
1 +// (NP -LRB- __ -RRB-) --> (PRN -LRB- __ -RRB-)
2 +tregex: NP=np <1 -LRB- <3 -RRB-=rrb <- =rrb
3 +operation: relabel np PRN
4 +
5 +// LRB is not the first child of the parent
6 +tregex: -LRB-=lrb $+ (__=xx $+ -RRB-=rrb) !>1 __
7 +operation: adjoinF (PRN @) lrb, move xx $- lrb, move rrb $- xx
8 +
9 +// PRN is not the last child of the parent
10 +tregex: PRN=prn !>- __ $- NP=np
11 +operation: adjoinF (NP @) np, move prn $- np
12 +
13 +// If the ref NP has PP, find the right most NP
14 +tregex: PRN=prn $- (NP < PP <<- (NP=np !< PP))
15 +operation: adjoinF (NP @) np, move prn $- np
16 +
17 +// If the ref is VP, find the right most NP
18 +tregex: PRN=prn $- (VP <<- (NP=np !< PP))
19 +operation: adjoinF (NP @) np, move prn $- np
20 +
1 +// NP (PRN L NP R)
2 +tregex: /^NP/=n0 <1 (/^NP/=ref !< PP !< SBAR) <2 (PRN=n3 <1 (-LRB-=n4) <2 (/^NP|QP|ADJP|NN/=elements) <3 (-RRB-=n7) <- =n7) <- =n3
3 +tregex: __=n0 < (/^ADJP|JJ/=ref $+ (PRN=n2 <1 (-LRB-=n3) <2 (/^NP/=elements) <3 (-RRB-=n5) <- =n5))
4 +
1 +tregex: NP < (NN=n1 $+ (-LRB-=lrb $+ (NN=n2 $+ (NN=n3 $+ -RRB-=rrb))))
2 +operation: prune lrb n2 n3 rrb
3 +operation: prune n1 lrb rrb
4 +
5 +tregex: NP=n0 <1 (NP=n1 !< PP) <2 (PRN=n3 <1 (-LRB-=n4) <2 (NP|QP|ADJP=n5) <3 (-RRB-=n7) <- =n7) <- =n3
6 +operation: replace n0 n1
7 +operation: replace n0 n5
8 +
9 +tregex: ADJP=n0 <1 (JJ=n1) <2 (PRN=n2 <1 (-LRB-=n3) <2 (NP=n4) <3 (-RRB-=n6) <- =n6) <- =n2
10 +operation: replace n2 n4
11 +
12 +tregex: S=n0 <1 (PRN=n1 <1 (-LRB-=n2) <2 (NP=n3) <3 (-RRB-=n13) <- =n13) <- =n1
13 +operation: prune n1
14 +
15 +tregex: S=n0 <1 (-LRB-=n1) <2 (NP=n2) <3 (VP=n13) <4 (-RRB-=n25) <- =n25
16 +operation: prune n0
17 +operation: new n0
18 +
19 +tregex: NP=n0 <1 (NN=n1) <2 (-LRB-=n2) <3 (CD=n3) <4 (-RRB-=n4) <- =n4
20 +operation: prune n2 n4
21 +operation: prune n2 n3 n4
22 +
23 +tregex: __ < (@/N.*|JJ|CD/=n0 $+ (-LRB-=n1 $+ (__=n2 $+ -RRB-=n3)))
24 +operation: prune n1 n2 n3
25 +operation: prune n0 n1 n3
26 +
27 +tregex: NP=n0 <1 (LST=n1 <1 (-LRB-=n2) <2 (LS=n3) <3 (-RRB-=n4) <- =n4)
28 +operation: prune n1
29 +
30 +tregex: VP=n0 <1 (VBN=n1) <2 (PRN=n2 <1 (-LRB-=n3) <2 (NP=n4) <3 (-RRB-=n6) <- =n6) <- =n2
31 +operation: replace n2 n4
32 +
33 +tregex: VP=n0 <1 (VBN=n1) <2 (ADVP=n2 <1 (RB=n3) <- =n3) <3 (PRN=n4 <1 (-LRB-=n5) <2 (NP=n6) <3 (-RRB-=n8) <- =n8) <- =n4
34 +operation: replace n2 n6, prune n4
35 +
36 +tregex: NP=n0 <1 (NP=n1 <1 (NP=n2) <2 (PP=n5 <1 (IN=n6) <2 (NP=n7) <- =n7) <- =n5) <2 (PRN=n11 <1 (-LRB-=n12) <2 (NP=n13) <3 (-RRB-=n15) <- =n15) <- =n11
37 +operation: replace n0 n1
38 +operation: replace n0 n13
39 +operation: replace n7 n13, prune n11
40 +
41 +tregex: __ < (ADJP|JJ=n0 $+ (PRN=n3 <1 (-LRB-=n4) <2 (__=n5) <3 (-RRB-=n7) <- =n7))
42 +operation: replace n3 n5
43 +
44 +tregex: __ < (NP|NN=n0 $+ (PRN=n3 <1 (-LRB-=n4) <2 (__=n5) <3 (-RRB-=n7) <- =n7))
45 +operation: replace n0 n5
46 +operation: prune n3
47 +
48 +tregex: NP=n0 <1 (NP=n1) <2 (PRN=n5 <1 (-LRB-=n6) <2 (NP=n7) <3 (/:/=n9) <4 (NP=n10) <5 (/,/=n13) <6 (NP=n14) <7 (-RRB-=n17) <- =n17) <- =n5
49 +operation: replace n0 n1
50 +operation: replace n0 n7
51 +operation: replace n0 n10
52 +operation: replace n0 n14
53 +
54 +tregex: NP=n0 <1 (NP=n1) <2 (PRN=n4 <1 (-LRB-=n5) <2 (NP=n6) <3 (/,/=n9) <4 (NP=n10) <5 (-RRB-=n13) <- =n13) <- =n4
55 +operation: replace n0 n1
56 +operation: replace n0 n6
57 +operation: replace n0 n10
58 +
59 +tregex: (PP=n0 <1 (IN=n1) <2 (NP=n2) <- =n2) $+ (PRN=n4 <1 (-LRB-=n5) <2 (NP=n6) <3 (-RRB-=n8) <- =n8)
60 +operation: prune n4
61 +operation: replace n2 n6, prune n4
62 +
63 +tregex: NP=n0 <1 (-LRB-=n1) <2 (NP=n2) <3 (-RRB-=n6) <- =n6
64 +operation: replace n0 n2
65 +
66 +#tregex: __=n0 <1 (-LRB-=n1) <2 (__=n2) <3 (-RRB-=n3)
67 +#operation: prune n1 n3
68 +
69 +tregex: NP=n0 <1 (JJ=n1) <2 (-LRB-=n2) <3 (NN=n3) <4 (CD=n4) <5 (-RRB-=n5) <6 (JJ=n6) <7 (NN=n7) <8 (NN=n8) <- =n8
70 +operation: prune n2 n5
71 +
72 +tregex: ADJP=n0 <1 (ADJP=n1) <2 (PRN=n3 <1 (-LRB-=n4) <2 (NP=n5) <3 (/,/=n8) <4 (NP=n9) <5 (-RRB-=n12) <- =n12) <- =n3
73 +operation: replace n3 n5
74 +operation: replace n3 n9
75 +
76 +tregex: NP=n0 <1 (NN=n1) <2 (NN=n2) <3 (JJ=n3) <4 (NN=n4) <5 (-LRB-=n5) <6 (NN=n6) <7 (NN=n7) <8 (-RRB-=n8) <9 (NN=n9) <- =n9
77 +operation: prune n5 n8 n6 n7
78 +operation: prune n4 n5 n8
79 +
80 +tregex: NP=n0 <1 (NP=n1) <2 (NN=n7) <3 (-LRB-=n8) <4 (NN=n9) <5 (NN=n10) <6 (-RRB-=n11) <7 (NN=n12) <- =n12
81 +operation: prune n8 n9 n10 n11
82 +operation: prune n7 n8 n11
83 +
84 +tregex: VP=n0 <1 (VBD=n1) <2 (ADVP=n2) <3 (PP=n4 <1 (IN=n5) <2 (NP=n6) <- =n6) <4 (PRN=n9 <1 (-LRB-=n10) <2 (NP=n11) <3 (/,/=n14) <4 (NP=n15) <5 (-RRB-=n21) <- =n21) <- =n9
85 +operation: prune n9
86 +operation: replace n6 n11, prune n9
87 +operation: replace n6 n15, prune n9
88 +
89 +tregex: NP=n0 <1 (NP=n1) <2 (PRN=n5 <1 (-LRB-=n6) <2 (CC|CONJP=n7) <3 (ADVP=n8) <4 (NP=n10) <5 (-RRB-=n13) <- =n13) <- =n5
90 +operation: replace n0 n1
91 +operation: replace n0 n10
92 +
93 +tregex: NP=n0 <1 (NP=n1) <2 (PRN=n6 <1 (-LRB-=n7) <2 (PP=n8 <1 (FW=n9) <- =n9) <3 (/,/=n10) <4 (NP=n11) <5 (-RRB-=n18) <- =n18) <- =n6
94 +operation: replace n0 n1
95 +operation: replace n0 n11
96 +
97 +tregex: NP=n0 <1 (NP=n1) <2 (PRN=n4 <1 (-LRB-=n5) <2 (PP=n6 <1 (FW=n7) <2 (NP=n8) <- =n8) <3 (/,/=n10) <4 (NP=n11) <5 (-RRB-=n13) <- =n13) <- =n4
98 +operation: replace n0 n1
99 +operation: replace n0 n8
100 +operation: replace n0 n11
101 +
102 +tregex: NP=n0 <1 (NP=n1) <2 (PRN=n3 <1 (-LRB-=n4) <2 (CC|CONJP=n5) <3 (NP=n6) <4 (-RRB-=n14) <- =n14) <- =n3
103 +operation: replace n0 n1
104 +operation: replace n0 n6
105 +
106 +tregex: NP=n0 <1 (NP=n1) <2 (PRN=n4 <1 (-LRB-=n5) <2 (NP=n6) <3 (/:/=n8) <4 (NP=n9) <5 (-RRB-=n11) <- =n11) <- =n4
107 +operation: replace n0 n1
108 +operation: replace n0 n6
109 +operation: replace n0 n9
110 +
111 +tregex: NP=n0 <1 (NP=n1) <2 (-LRB-=n7) <3 (CC|CONJP=n8) <4 (NP=n9) <5 (-RRB-=n16) <- =n16
112 +operation: replace n0 n1
113 +operation: replace n0 n9
114 +
115 +tregex: NN=n1 $+ (-LRB-=n2 $+ (NN|JJ=n3 $+ (NN|CD=n4 $+ (-RRB-=n5))))
116 +operation: prune n2 n3 n4 n5
117 +operation: prune n1 n2 n5
118 +
119 +tregex: NP=n0 <1 (NP=n1) <2 (-LRB-=n3) <3 (CC|CONJP=n4) <4 (NP=n5) <5 (-RRB-=n7) <- =n7
120 +operation: replace n0 n1
121 +operation: replace n0 n5
122 +
123 +tregex: NP=n0 <1 (NP=n1) <2 (PRN=n4 <1 (-LRB-=n5) <2 (NP=n6) <3 (PP=n8) <4 (-RRB-=n15) <- =n15) <- =n4
124 +operation: replace n0 n1
125 +operation: replace n1 n6, replace n4 n8
126 +
127 +tregex: NN=n5 $+ (-LRB-=n6 $+ (NN=n7 $+ (/,/=n8 $+ (NN=n9 $+ -RRB-=n10))))
128 +operation: prune n6 n7 n8 n9 n10
129 +operation: prune n5 n6 n8 n9 n10
130 +operation: prune n5 n6 n7 n8 n10
131 +
132 +tregex: NP=n0 <1 (NP=n1) <2 (PRN=n4 <1 (-LRB-=n5) <2 (PP=n6 <1 (IN=n7 <: /for/) <2 (NP=n8 <1 (NN=n9 < /example/) <- =n9) <- =n8) <3 (/,/=n10) <4 (NP=n11 <1 (NP=n12) <2 (/,/=n14) <3 (NP=n15) <- =n15) <5 (-RRB-=n17) <- =n17) <- =n4
133 +operation: replace n0 n1
134 +operation: replace n0 n11
135 +operation: replace n0 n15
1 +// ref is pp
2 +tregex: (PP <<- (NP=np !< PP)) $+ (SBAR=clause <1 (WHNP|WHPP|WHADVP) <2 (S <1 /^VP/=vp))
3 +operation: adjoinF (NP @) np, move clause $- np
4 +
5 +tregex: (PP <<- (NP=np !< PP)) $+ (/,/=comma $+ (SBAR=clause <1 (WHNP|WHPP|WHADVP) <2 (S <1 /^VP/=vp)))
6 +operation: adjoinF (NP @) np, move comma $- np, move clause $- comma
1 +// whnp relative clause
2 +tregex: /^NP/=p < (/^NP/=ref $+ (/^SBAR/=clause <1 (WHNP) <2 (S <1 /^VP/=vp)))
3 +tregex: /^NP/=p < (/^NP/=ref $+ (/^SBAR/=clause <1 (WHNP) <2 (S <1 PP|ADVP <2 /^VP/=vp)))
4 +tregex: /^NP/=p < (/^NP/=ref $+ (/,/ $+ (/^SBAR/=clause <1 (WHNP) <2 (S <1 /^VP/=vp))))
5 +tregex: /^NP/=p < (/^NP/=ref $+ (/,/ $+ (/^SBAR/=clause <1 (WHNP) <2 (S <1 PP|ADVP <2 /^VP/=vp))))
6 +// only one case
7 +tregex: /^NP/=p < (/^NP/=ref $+ (/,/ $+ (/^SBAR/=clause <1 (WHNP <<: /which/) <2 (S <1 /^NP/=vp))))
8 +
9 +// wrb relative clause
10 +tregex: /^NP/=p < (/^NP/=ref $+ (/^SBAR/=clause <1 WHADVP <2 S=s))
11 +tregex: /^NP/=p < (/^NP/=ref $+ (/,/ $+ (/^SBAR/=clause <1 WHADVP <2 S=s)))
12 +
13 +// whpp relative clause
14 +tregex: /^NP/=p < (/^NP/=ref $+ (/^SBAR/=clause <1 (WHPP <1 IN=in) <2 (S <1 /^NP/)))
15 +tregex: /^NP/=p < (/^NP/=ref $+ (/,/ $+ (/^SBAR/=clause <1 (WHPP <1 IN=in) <2 (S <1 /^NP/))))
16 +
17 +// wp$ relative clause
18 +tregex: /^NP/=p < (/^NP/=ref $+ (/,/ $+ (/^SBAR/=clause <1 /^WP[$]/ <2 S=s)))
19 +
20 +// NP of NP
21 +tregex: /^NP/=p < (/^NP/=ref1 $+ ((/^PP/ <1 IN <2 NP=ref) $+ (/^SBAR/=clause <1 (WHNP=whnp <1 /WP\$/=wp <2 __=np) <2 (S <1 /^VP/=vp))))
22 +tregex: /^NP/=p < (/^NP/=ref1 $+ ((/^PP/ <2 NP=ref2) $+ (/,/=comma $+ (/^SBAR/=clause <1 (WHNP=whnp <1 /WP\$/=wp <2 __=np) <2 (S <1 /^VP/=vp)))))
23 +tregex: /^NP/=p < (/^NP/=ref1 $+ (/,/ $+ (((/^VP/ <2 /^S|NP$/=ref2) $+ (/,/=comma $+ (/^SBAR/=clause <1 (WHNP=whnp <1 /WP\$/=wp <2 __=np) <2 (S <1 /^VP/=vp)))))))
24 +
25 +// NP of NP
26 +tregex: /^NP/=p < (/^NP/=ref1 $+ ((PP <1 IN <2 NP=ref) $+ (/^SBAR/=clause <1 (WHNP <1 WDT) <2 (S <1 /^VP/=vp))))
27 +tregex: /^NP/=p < (/^NP/=ref1 $+ ((PP <2 NP=ref) $+ (/,/=comma $+ (/^SBAR/=clause <1 (WHNP <1 WDT) <2 (S <1 /^VP/=vp)))))
28 +tregex: /^NP/=p < (/^NP/=ref1 $+ (/,/ $+ (((/^VP/ <2 /^S|NP$/=ref) $+ (/,/=comma $+ (/^SBAR/=clause <1 (WHNP <1 WDT) <2 (S <1 /^VP/=vp)))))))
29 +
30 +// as demenstrated ...
31 +#tregex: /^NP/=p < (/^NP/=ref $+ (/^SBAR/=clause <1 (IN << /as/) <2 (S <1 /^VP/=vp)))
32 +
33 +// reduced relative clause
34 +tregex: /^NP/=p <1 /^NP/=ref <2 (/^VP/=clause <1 VBN|VBG)
35 +tregex: /^NP/=p <1 /^NP/=ref <2 (/^PP/=clause <1 (VBG << /including/))
36 +tregex: /^NP/=p <1 @/N.*/=ref <2 /,/ <3 (/^VP/=clause <1 VBN|VBG)
37 +tregex: __=p < (/^NP/=ref $+ (/^VP/=clause <1 /^VBG/))
38 +tregex: __=p < (/^NP/=ref $+ (/,/ $+ (/^S/ <: (/^VP/=clause <1 /^VBG/))))
39 +#tregex: /^NP/=p <1 /^NP/=ref <2 VBN|VBG=clause
40 +
41 +#tregex: __=p < (/,/=comma $+ (S=s <: (/^VP/=clause <1 (VBG) <2 (/^SBAR/ <2 S=s2))))
42 +
1 +// whnp relative clause
2 +// 16
3 +tregex: NP=p < (NP=ref $+ (SBAR <1 (WHNP=whnp <1 /WP\$/=wp <2 __=np) <2 (S=clause)))
4 +operation: replace p ref
5 +operation: replace wp ref, relabel whnp NP
6 +
7 +// NP of NP
8 +tregex: NP=p < (NP=ref1 $+ ((PP <1 IN <2 NP=ref2) $+ (SBAR=clause <1 (WHNP=whnp <1 /WP\$/=wp <2 __=np) <2 (S <1 VP=vp))))
9 +operation: prune clause
10 +operation: replace wp ref1, relabel whnp NP
11 +operation: replace wp ref2, relabel whnp NP
12 +
13 +tregex: NP=p < (NP=ref1 $+ ((PP <2 NP=ref2) $+ (/,/=comma $+ (SBAR=clause <1 (WHNP=whnp <1 /WP\$/=wp <2 __=np) <2 (S <1 VP=vp)))))
14 +operation: prune clause comma
15 +operation: replace wp ref1, relabel whnp NP
16 +operation: replace wp ref2, relabel whnp NP
17 +
18 +tregex: NP=p < (NP=ref1 $+ (/,/ $+ (((VP <2 /^S|NP$/=ref2) $+ (/,/=comma $+ (SBAR=clause <1 (WHNP=whnp <1 /WP\$/=wp <2 __=np) <2 (S <1 VP=vp)))))))
19 +operation: prune clause comma
20 +operation: replace wp ref1, relabel whnp NP
21 +operation: replace wp ref2, relabel whnp NP
22 +
23 +// wdt relative clause
24 +// 154
25 +tregex: NP=p < (NP=ref $+ (SBAR=clause <1 (WHNP <1 WDT) <2 (S <1 VP=vp)))
26 +// 88
27 +tregex: NP=p < (NP=ref $+ (/,/ $+ (SBAR=clause <1 (WHNP <1 WDT) <2 (S <1 VP=vp))))
28 +operation: replace p ref
29 +operation: new ref vp
30 +
31 +// NP of NP
32 +tregex: NP=p < (NP=ref1 $+ ((PP <1 IN <2 NP=ref2) $+ (SBAR=clause <1 (WHNP <1 WDT) <2 (S <1 VP=vp))))
33 +operation: prune clause
34 +operation: new ref1 vp
35 +operation: new ref2 vp
36 +
37 +tregex: NP=p < (NP=ref1 $+ ((PP <2 NP=ref2) $+ (/,/=comma $+ (SBAR=clause <1 (WHNP <1 WDT) <2 (S <1 VP=vp)))))
38 +operation: prune clause comma
39 +operation: new ref1 vp
40 +operation: new ref2 vp
41 +
42 +tregex: NP=p < (NP=ref1 $+ (/,/ $+ (((VP <2 /^S|NP$/=ref2) $+ (/,/=comma $+ (SBAR=clause <1 (WHNP <1 WDT) <2 (S <1 VP=vp)))))))
43 +operation: prune clause comma
44 +operation: new ref1 vp
45 +operation: new ref2 vp
46 +
47 +// wrb relative clause
48 +// 4
49 +tregex: NP=p < (NP=ref $+ (SBAR=clause <1 WHADVP <2 S=s))
50 +// 4
51 +tregex: NP=p < (NP=ref $+ (/,/ $+ (SBAR=clause <1 WHADVP <2 S=s)))
52 +operation: replace p ref
53 +operation: new s
54 +
55 +// as demenstrated ...
56 +// 6
57 +tregex: NP=p < (NP=ref $+ (SBAR <1 (IN << /as/) <2 (S <1 VP=vp)))
58 +operation: replace p ref
59 +
60 +// whpp relative clause
61 +// 18
62 +tregex: NP=p < (NP=ref $+ (SBAR <1 (WHPP <1 IN=in) <2 (S=clause <1 NP)))
63 +operation: replace p ref
64 +operation: new clause in ref
65 +
66 +// reduced relative clause
67 +// 471
68 +tregex: NP=p <1 NP=ref <2 (VP=vp <1 VBN|VBG)
69 +// 23
70 +tregex: NP=p <1 @/N.*/=ref <2 /,/ <3 (VP=vp <1 VBN|VBG)
71 +// 1
72 +tregex: NP=p <1 NP=ref <2 VBN|VBG=vp
73 +operation: replace p ref
74 +operation: new ref vp
75 +
76 +tregex: __=p < (/,/=comma $+ (S=s <: (VP <1 (VBG) <2 (SBAR <2 S=s2))))
77 +operation: prune comma s
78 +operation: new s2
79 +
1 +// When VBN, Theme
2 +tregex: S <1 (SBAR=begin <1 WHADVP <2 (S <: (VP=tr))) <2 /,/=comma <3 (__ << NP=arg)
3 +operation: new arg tr
4 +operation: prune begin comma
5 +
6 +tregex: S <1 (SBAR=begin <1 WHADVP <2 (S <: (VP=tr))) <2 (__ << NP=arg)
7 +operation: new arg tr
8 +operation: prune begin
9 +
10 +// By VBG, Theme
11 +tregex: S <1 (PP=begin <2 (S <: (VP=tr))) <2 /,/=comma <3 (NP=arg)
12 +operation: new arg tr
13 +operation: prune begin comma
14 +
15 +tregex: S <1 (PP=begin <2 (S <: (VP=tr))) <2 (NP=arg)
16 +operation: new arg tr
17 +operation: prune begin
1 +#!/bin/bash
2 +for f in /home/cmendezc/isimp_v2/inputFiles/*; do
3 + oFile="${f/input/output}"
4 + echo $oFile
5 + bash simplify.sh $f "${oFile/.txt/.isimp.txt}"
6 +done
1 +#!/bin/sh -x
2 +
3 +#!/bin/sh
4 +
5 +# Might fail if $0 is a link
6 +TAGGER_HOME=`dirname "$0"`
7 +CLASSPATH="$TAGGER_HOME/lib/*:$TAGGER_HOME/bin"
8 +OUTPUT_DIR="$TAGGER_HOME/bin"
9 +SRC_DIR="$TAGGER_HOME/src"
10 +
11 +# get java
12 +if [ -z "$JAVACMD" ] ; then
13 + if [ -n "$JAVA_HOME" ] ; then
14 + JAVACMD="$JAVA_HOME/bin/java"
15 + else
16 + JAVACMD="`which java`"
17 + fi
18 +fi
19 +
20 +$JAVACMD -Xmx1024m -classpath $CLASSPATH:$OUTPUT_DIR main.Console $@
21 +
1 +{
2 + "TYPE": "sentence",
3 + "TEXT": "The B cell-associated surface molecule CD40 functions to regulate B cell responses.",
4 + "FROM": 0,
5 + "TO ": 83,
6 + "POS ": "The_DT B_NN cell-associated_JJ surface_NN molecule_NN CD40_NN functions_VBZ to_TO regulate_VB B_NN cell_NN responses_NNS ._. ",
7 + "TREE": "(ROOT (S (NP (DT The) (NN B) (JJ cell-associated) (NN surface) (NN molecule) (NN CD40)) (VP (VBZ functions) (S (VP (TO to) (VP (VB regulate) (NP (NN B) (NN cell) (NNS responses)))))) (. .)))",
8 + "SIMP": []
9 +}
10 +{
11 + "TYPE": "sentence",
12 + "TEXT": "Cross-linking CD40 on B cells can lead to homotypic cell adhesion, IL-6 production, and, in combination with cytokines, to Ig isotype switching.",
13 + "FROM": 84,
14 + "TO ": 228,
15 + "POS ": "Cross-linking_NN CD40_NN on_IN B_NN cells_NNS can_MD lead_VB to_TO homotypic_JJ cell_NN adhesion_NN ,_, IL-6_NN production_NN ,_, and_CC ,_, in_IN combination_NN with_IN cytokines_NNS ,_, to_TO Ig_NN isotype_NN switching_NN ._. ",
16 + "TREE": "(ROOT (S (NP (NP (NN Cross-linking) (NN CD40)) (PP (IN on) (NP (NN B) (NNS cells)))) (VP (MD can) (VP (VB lead) (PP (PP (TO to) (NP (NP (JJ homotypic) (NN cell) (NN adhesion)) (, ,) (NP (NN IL-6) (NN production)) (, ,))) (CC and) (PRN (, ,) (PP (IN in) (NP (NP (NN combination)) (PP (IN with) (NP (NNS cytokines))))) (, ,)) (PP (TO to) (NP (NN Ig) (NN isotype) (NN switching)))))) (. .)))",
17 + "SIMP": []
18 +}
19 +{
20 + "TYPE": "sentence",
21 + "TEXT": "Tyrosine kinase activity is increased shortly after engagement of this receptor.",
22 + "FROM": 229,
23 + "TO ": 309,
24 + "POS ": "Tyrosine_NN kinase_NN activity_NN is_VBZ increased_VBN shortly_RB after_IN engagement_NN of_IN this_DT receptor_NN ._. ",
25 + "TREE": "(ROOT (S (NP (NN Tyrosine) (NN kinase) (NN activity)) (VP (VBZ is) (VP (VBN increased) (ADVP (RB shortly)) (PP (IN after) (NP (NP (NN engagement)) (PP (IN of) (NP (DT this) (NN receptor))))))) (. .)))",
26 + "SIMP": []
27 +}
28 +{
29 + "TYPE": "sentence",
30 + "TEXT": "Little is known about how the very early events induced by CD40 cross-linking link to cellular responses.",
31 + "FROM": 310,
32 + "TO ": 415,
33 + "POS ": "Little_JJ is_VBZ known_VBN about_IN how_WRB the_DT very_RB early_JJ events_NNS induced_VBN by_IN CD40_NN cross-linking_NN link_NN to_TO cellular_JJ responses_NNS ._. ",
34 + "TREE": "(ROOT (S (NP (JJ Little)) (VP (VBZ is) (VP (VBN known) (PP (IN about) (SBAR (WHADVP (WRB how)) (S (NP (DT the) (ADJP (RB very) (JJ early)) (NNS events)) (VP (VBN induced) (PP (IN by) (NP (NP (NN CD40) (NN cross-linking) (NN link)) (PP (TO to) (NP (JJ cellular) (NNS responses))))))))))) (. .)))",
35 + "SIMP": []
36 +}
37 +{
38 + "TYPE": "sentence",
39 + "TEXT": "In this study, we demonstrate that nuclear factor (NF)-kappa B and NF-kappa B-like transcription factors are activated after cross-linking CD40 on resting human tonsillar B cells and on B cell lines.",
40 + "FROM": 416,
41 + "TO ": 615,
42 + "POS ": "In_IN this_DT study_NN ,_, we_PRP demonstrate_VBP that_IN nuclear_JJ factor_NN -LRB-_-LRB- NF_NN -RRB-_-RRB- -_: kappa_NN B_NN and_CC NF-kappa_NN B-like_JJ transcription_NN factors_NNS are_VBP activated_VBN after_IN cross-linking_JJ CD40_NN on_IN resting_VBG human_JJ tonsillar_JJ B_NN cells_NNS and_CC on_IN B_NN cell_NN lines_NNS ._. ",
43 + "TREE": "(ROOT (S (PP (IN In) (NP (DT this) (NN study))) (, ,) (NP (PRP we)) (VP (VBP demonstrate) (SBAR (IN that) (S (NP (NP (JJ nuclear) (NN factor)) (PRN (-LRB- -LRB-) (NP (NN NF)) (-RRB- -RRB-))) (: -) (NP (NP (NN kappa) (NN B)) (CC and) (NP (NN NF-kappa) (JJ B-like) (NN transcription) (NNS factors))) (VP (VBP are) (VP (VBN activated) (PP (IN after) (NP (JJ cross-linking) (NN CD40))) (PP (PP (IN on) (S (VP (VBG resting) (NP (JJ human) (JJ tonsillar) (NN B) (NNS cells))))) (CC and) (PP (IN on) (NP (NN B) (NN cell) (NNS lines))))))))) (. .)))",
44 + "SIMP": [
45 + {
46 + "TYPE": "parenthesis",
47 + "TEXT": "nuclear factor (NF",
48 + "FROM": 451,
49 + "TO ": 469,
50 + "COMP": [
51 + {
52 + "TYPE": "referred noun phrase",
53 + "FROM": 451,
54 + "TO ": 465
55 + },
56 + {
57 + "TYPE": "parenthesized elements",
58 + "FROM": 467,
59 + "TO ": 469
60 + }
61 + ]
62 + },
63 + {
64 + "TYPE": "noun or noun phrase coordination",
65 + "TEXT": "kappa B and NF-kappa B-like transcription factors",
66 + "FROM": 471,
67 + "TO ": 520,
68 + "COMP": [
69 + {
70 + "TYPE": "conjunct",
71 + "FROM": 471,
72 + "TO ": 478
73 + },
74 + {
75 + "TYPE": "conjunction",
76 + "FROM": 479,
77 + "TO ": 482
78 + },
79 + {
80 + "TYPE": "conjunct",
81 + "FROM": 483,
82 + "TO ": 520
83 + }
84 + ]
85 + },
86 + {
87 + "TYPE": "prep or prep phrase coordination",
88 + "TEXT": "on resting human tonsillar B cells and on B cell lines",
89 + "FROM": 560,
90 + "TO ": 614,
91 + "COMP": [
92 + {
93 + "TYPE": "conjunct",
94 + "FROM": 560,
95 + "TO ": 594
96 + },
97 + {
98 + "TYPE": "conjunction",
99 + "FROM": 595,
100 + "TO ": 598
101 + },
102 + {
103 + "TYPE": "conjunct",
104 + "FROM": 599,
105 + "TO ": 614
106 + }
107 + ]
108 + }
109 + ]
110 +}
111 +{
112 + "TYPE": "sentence",
113 + "TEXT": "The activation is rapid and is mediated through a tyrosine kinase-dependent pathway.",
114 + "FROM": 616,
115 + "TO ": 700,
116 + "POS ": "The_DT activation_NN is_VBZ rapid_JJ and_CC is_VBZ mediated_VBN through_IN a_DT tyrosine_NN kinase-dependent_JJ pathway_NN ._. ",
117 + "TREE": "(ROOT (S (NP (DT The) (NN activation)) (VP (VP (VBZ is) (ADJP (JJ rapid))) (CC and) (VP (VBZ is) (VP (VBN mediated) (PP (IN through) (NP (DT a) (NN tyrosine) (JJ kinase-dependent) (NN pathway)))))) (. .)))",
118 + "SIMP": [
119 + {
120 + "TYPE": "verb or verb phrase coordination",
121 + "TEXT": "is rapid and is mediated through a tyrosine kinase-dependent pathway",
122 + "FROM": 631,
123 + "TO ": 699,
124 + "COMP": [
125 + {
126 + "TYPE": "conjunct",
127 + "FROM": 631,
128 + "TO ": 639
129 + },
130 + {
131 + "TYPE": "conjunction",
132 + "FROM": 640,
133 + "TO ": 643
134 + },
135 + {
136 + "TYPE": "conjunct",
137 + "FROM": 644,
138 + "TO ": 699
139 + }
140 + ]
141 + }
142 + ]
143 +}
144 +{
145 + "TYPE": "sentence",
146 + "TEXT": "The complexes detected in electrophoretic mobility shift assays contain p50, p65 (RelA), c-Rel, and most likely other components.",
147 + "FROM": 701,
148 + "TO ": 830,
149 + "POS ": "The_DT complexes_NNS detected_VBN in_IN electrophoretic_JJ mobility_NN shift_NN assays_NNS contain_VBP p50_NN ,_, p65_NN -LRB-_-LRB- RelA_NN -RRB-_-RRB- ,_, c-Rel_NN ,_, and_CC most_RBS likely_JJ other_JJ components_NNS ._. ",
150 + "TREE": "(ROOT (S (NP (NP (DT The) (NNS complexes)) (VP (VBN detected) (PP (IN in) (NP (JJ electrophoretic) (NN mobility) (NN shift) (NNS assays))))) (VP (VBP contain) (NP (NP (NN p50) (, ,) (NN p65) (PRN (-LRB- -LRB-) (NN RelA) (-RRB- -RRB-))) (, ,) (NP (NN c-Rel)) (, ,) (CC and) (NP (ADJP (RBS most) (JJ likely)) (JJ other) (NNS components)))) (. .)))",
151 + "SIMP": [
152 + {
153 + "TYPE": "reduced relative clause",
154 + "TEXT": "The complexes detected in electrophoretic mobility shift assays",
155 + "FROM": 701,
156 + "TO ": 764,
157 + "COMP": [
158 + {
159 + "TYPE": "referred noun phrase",
160 + "FROM": 701,
161 + "TO ": 714
162 + },
163 + {
164 + "TYPE": "clause",
165 + "FROM": 715,
166 + "TO ": 764
167 + }
168 + ]
169 + },
170 + {
171 + "TYPE": "noun or noun phrase coordination",
172 + "TEXT": "p50, p65 (RelA), c-Rel, and most likely other components",
173 + "FROM": 773,
174 + "TO ": 829,
175 + "COMP": [
176 + {
177 + "TYPE": "conjunct",
178 + "FROM": 773,
179 + "TO ": 788
180 + },
181 + {
182 + "TYPE": "conjunct",
183 + "FROM": 790,
184 + "TO ": 795
185 + },
186 + {
187 + "TYPE": "conjunction",
188 + "FROM": 797,
189 + "TO ": 800
190 + },
191 + {
192 + "TYPE": "conjunct",
193 + "FROM": 801,
194 + "TO ": 829
195 + }
196 + ]
197 + }
198 + ]
199 +}
200 +{
201 + "TYPE": "sentence",
202 + "TEXT": "By using transient transfection assays, we found that cross-linking CD40 supports NF-kappa B-dependent gene expression.",
203 + "FROM": 831,
204 + "TO ": 950,
205 + "POS ": "By_IN using_VBG transient_JJ transfection_NN assays_NNS ,_, we_PRP found_VBD that_IN cross-linking_JJ CD40_NN supports_VBZ NF-kappa_NN B-dependent_JJ gene_NN expression_NN ._. ",
206 + "TREE": "(ROOT (S (PP (IN By) (S (VP (VBG using) (NP (JJ transient) (NN transfection) (NNS assays))))) (, ,) (NP (PRP we)) (VP (VBD found) (SBAR (IN that) (S (NP (JJ cross-linking) (NN CD40)) (VP (VBZ supports) (NP (NP (NN NF-kappa)) (NP (JJ B-dependent) (NN gene) (NN expression))))))) (. .)))",
207 + "SIMP": []
208 +}
209 +{
210 + "TYPE": "sentence",
211 + "TEXT": "Our results define the NF-kappa B system as an intermediate event in CD40 signaling and suggest that the CD40 pathway can influence the expression of B cell-associated genes with NF-kappa B consensus sites.",
212 + "FROM": 951,
213 + "TO ": 1157,
214 + "POS ": "Our_PRP$ results_NNS define_VBP the_DT NF-kappa_NN B_NN system_NN as_IN an_DT intermediate_JJ event_NN in_IN CD40_NN signaling_NN and_CC suggest_VBP that_IN the_DT CD40_NN pathway_NN can_MD influence_VB the_DT expression_NN of_IN B_NN cell-associated_JJ genes_NNS with_IN NF-kappa_NN B_NN consensus_NN sites_NNS ._. ",
215 + "TREE": "(ROOT (S (NP (PRP$ Our) (NNS results)) (VP (VP (VBP define) (NP (DT the) (NN NF-kappa) (NN B) (NN system)) (PP (IN as) (NP (NP (DT an) (JJ intermediate) (NN event)) (PP (IN in) (NP (NN CD40) (NN signaling)))))) (CC and) (VP (VBP suggest) (SBAR (IN that) (S (NP (DT the) (NN CD40) (NN pathway)) (VP (MD can) (VP (VB influence) (NP (NP (DT the) (NN expression)) (PP (IN of) (NP (NN B) (JJ cell-associated) (NNS genes)))) (PP (IN with) (NP (NN NF-kappa) (NN B) (NN consensus) (NNS sites))))))))) (. .)))",
216 + "SIMP": [
217 + {
218 + "TYPE": "verb or verb phrase coordination",
219 + "TEXT": "define the NF-kappa B system as an intermediate event in CD40 signaling and suggest that the CD40 pathway can influence the expression of B cell-associated genes with NF-kappa B consensus sites",
220 + "FROM": 963,
221 + "TO ": 1156,
222 + "COMP": [
223 + {
224 + "TYPE": "conjunct",
225 + "FROM": 963,
226 + "TO ": 1034
227 + },
228 + {
229 + "TYPE": "conjunction",
230 + "FROM": 1035,
231 + "TO ": 1038
232 + },
233 + {
234 + "TYPE": "conjunct",
235 + "FROM": 1039,
236 + "TO ": 1156
237 + }
238 + ]
239 + }
240 + ]
241 +}
1 +{
2 + "TYPE": "sentence",
3 + "TEXT": "The B cell-associated surface molecule CD40 functions to regulate B cell responses.",
4 + "FROM": 0,
5 + "TO ": 83,
6 + "POS ": "The_DT B_NN cell-associated_JJ surface_NN molecule_NN CD40_NN functions_VBZ to_TO regulate_VB B_NN cell_NN responses_NNS ._. ",
7 + "TREE": "(ROOT (S (NP (DT The) (NN B) (JJ cell-associated) (NN surface) (NN molecule) (NN CD40)) (VP (VBZ functions) (S (VP (TO to) (VP (VB regulate) (NP (NN B) (NN cell) (NNS responses)))))) (. .)))",
8 + "SIMP": []
9 +}
10 +{
11 + "TYPE": "sentence",
12 + "TEXT": "Cross-linking CD40 on B cells can lead to homotypic cell adhesion, IL-6 production, and, in combination with cytokines, to Ig isotype switching.",
13 + "FROM": 84,
14 + "TO ": 228,
15 + "POS ": "Cross-linking_NN CD40_NN on_IN B_NN cells_NNS can_MD lead_VB to_TO homotypic_JJ cell_NN adhesion_NN ,_, IL-6_NN production_NN ,_, and_CC ,_, in_IN combination_NN with_IN cytokines_NNS ,_, to_TO Ig_NN isotype_NN switching_NN ._. ",
16 + "TREE": "(ROOT (S (NP (NP (NN Cross-linking) (NN CD40)) (PP (IN on) (NP (NN B) (NNS cells)))) (VP (MD can) (VP (VB lead) (PP (PP (TO to) (NP (NP (JJ homotypic) (NN cell) (NN adhesion)) (, ,) (NP (NN IL-6) (NN production)) (, ,))) (CC and) (PRN (, ,) (PP (IN in) (NP (NP (NN combination)) (PP (IN with) (NP (NNS cytokines))))) (, ,)) (PP (TO to) (NP (NN Ig) (NN isotype) (NN switching)))))) (. .)))",
17 + "SIMP": []
18 +}
19 +{
20 + "TYPE": "sentence",
21 + "TEXT": "Tyrosine kinase activity is increased shortly after engagement of this receptor.",
22 + "FROM": 229,
23 + "TO ": 309,
24 + "POS ": "Tyrosine_NN kinase_NN activity_NN is_VBZ increased_VBN shortly_RB after_IN engagement_NN of_IN this_DT receptor_NN ._. ",
25 + "TREE": "(ROOT (S (NP (NN Tyrosine) (NN kinase) (NN activity)) (VP (VBZ is) (VP (VBN increased) (ADVP (RB shortly)) (PP (IN after) (NP (NP (NN engagement)) (PP (IN of) (NP (DT this) (NN receptor))))))) (. .)))",
26 + "SIMP": []
27 +}
28 +{
29 + "TYPE": "sentence",
30 + "TEXT": "Little is known about how the very early events induced by CD40 cross-linking link to cellular responses.",
31 + "FROM": 310,
32 + "TO ": 415,
33 + "POS ": "Little_JJ is_VBZ known_VBN about_IN how_WRB the_DT very_RB early_JJ events_NNS induced_VBN by_IN CD40_NN cross-linking_NN link_NN to_TO cellular_JJ responses_NNS ._. ",
34 + "TREE": "(ROOT (S (NP (JJ Little)) (VP (VBZ is) (VP (VBN known) (PP (IN about) (SBAR (WHADVP (WRB how)) (S (NP (DT the) (ADJP (RB very) (JJ early)) (NNS events)) (VP (VBN induced) (PP (IN by) (NP (NP (NN CD40) (NN cross-linking) (NN link)) (PP (TO to) (NP (JJ cellular) (NNS responses))))))))))) (. .)))",
35 + "SIMP": []
36 +}
37 +{
38 + "TYPE": "sentence",
39 + "TEXT": "In this study, we demonstrate that nuclear factor (NF)-kappa B and NF-kappa B-like transcription factors are activated after cross-linking CD40 on resting human tonsillar B cells and on B cell lines.",
40 + "FROM": 416,
41 + "TO ": 615,
42 + "POS ": "In_IN this_DT study_NN ,_, we_PRP demonstrate_VBP that_IN nuclear_JJ factor_NN -LRB-_-LRB- NF_NN -RRB-_-RRB- -_: kappa_NN B_NN and_CC NF-kappa_NN B-like_JJ transcription_NN factors_NNS are_VBP activated_VBN after_IN cross-linking_JJ CD40_NN on_IN resting_VBG human_JJ tonsillar_JJ B_NN cells_NNS and_CC on_IN B_NN cell_NN lines_NNS ._. ",
43 + "TREE": "(ROOT (S (PP (IN In) (NP (DT this) (NN study))) (, ,) (NP (PRP we)) (VP (VBP demonstrate) (SBAR (IN that) (S (NP (NP (JJ nuclear) (NN factor)) (PRN (-LRB- -LRB-) (NP (NN NF)) (-RRB- -RRB-))) (: -) (NP (NP (NN kappa) (NN B)) (CC and) (NP (NN NF-kappa) (JJ B-like) (NN transcription) (NNS factors))) (VP (VBP are) (VP (VBN activated) (PP (IN after) (NP (JJ cross-linking) (NN CD40))) (PP (PP (IN on) (S (VP (VBG resting) (NP (JJ human) (JJ tonsillar) (NN B) (NNS cells))))) (CC and) (PP (IN on) (NP (NN B) (NN cell) (NNS lines))))))))) (. .)))",
44 + "SIMP": [
45 + {
46 + "TYPE": "parenthesis",
47 + "TEXT": "nuclear factor (NF",
48 + "FROM": 451,
49 + "TO ": 469,
50 + "COMP": [
51 + {
52 + "TYPE": "referred noun phrase",
53 + "FROM": 451,
54 + "TO ": 465
55 + },
56 + {
57 + "TYPE": "parenthesized elements",
58 + "FROM": 467,
59 + "TO ": 469
60 + }
61 + ]
62 + },
63 + {
64 + "TYPE": "noun or noun phrase coordination",
65 + "TEXT": "kappa B and NF-kappa B-like transcription factors",
66 + "FROM": 471,
67 + "TO ": 520,
68 + "COMP": [
69 + {
70 + "TYPE": "conjunct",
71 + "FROM": 471,
72 + "TO ": 478
73 + },
74 + {
75 + "TYPE": "conjunction",
76 + "FROM": 479,
77 + "TO ": 482
78 + },
79 + {
80 + "TYPE": "conjunct",
81 + "FROM": 483,
82 + "TO ": 520
83 + }
84 + ]
85 + },
86 + {
87 + "TYPE": "prep or prep phrase coordination",
88 + "TEXT": "on resting human tonsillar B cells and on B cell lines",
89 + "FROM": 560,
90 + "TO ": 614,
91 + "COMP": [
92 + {
93 + "TYPE": "conjunct",
94 + "FROM": 560,
95 + "TO ": 594
96 + },
97 + {
98 + "TYPE": "conjunction",
99 + "FROM": 595,
100 + "TO ": 598
101 + },
102 + {
103 + "TYPE": "conjunct",
104 + "FROM": 599,
105 + "TO ": 614
106 + }
107 + ]
108 + }
109 + ]
110 +}
111 +{
112 + "TYPE": "sentence",
113 + "TEXT": "The activation is rapid and is mediated through a tyrosine kinase-dependent pathway.",
114 + "FROM": 616,
115 + "TO ": 700,
116 + "POS ": "The_DT activation_NN is_VBZ rapid_JJ and_CC is_VBZ mediated_VBN through_IN a_DT tyrosine_NN kinase-dependent_JJ pathway_NN ._. ",
117 + "TREE": "(ROOT (S (NP (DT The) (NN activation)) (VP (VP (VBZ is) (ADJP (JJ rapid))) (CC and) (VP (VBZ is) (VP (VBN mediated) (PP (IN through) (NP (DT a) (NN tyrosine) (JJ kinase-dependent) (NN pathway)))))) (. .)))",
118 + "SIMP": [
119 + {
120 + "TYPE": "verb or verb phrase coordination",
121 + "TEXT": "is rapid and is mediated through a tyrosine kinase-dependent pathway",
122 + "FROM": 631,
123 + "TO ": 699,
124 + "COMP": [
125 + {
126 + "TYPE": "conjunct",
127 + "FROM": 631,
128 + "TO ": 639
129 + },
130 + {
131 + "TYPE": "conjunction",
132 + "FROM": 640,
133 + "TO ": 643
134 + },
135 + {
136 + "TYPE": "conjunct",
137 + "FROM": 644,
138 + "TO ": 699
139 + }
140 + ]
141 + }
142 + ]
143 +}
144 +{
145 + "TYPE": "sentence",
146 + "TEXT": "The complexes detected in electrophoretic mobility shift assays contain p50, p65 (RelA), c-Rel, and most likely other components.",
147 + "FROM": 701,
148 + "TO ": 830,
149 + "POS ": "The_DT complexes_NNS detected_VBN in_IN electrophoretic_JJ mobility_NN shift_NN assays_NNS contain_VBP p50_NN ,_, p65_NN -LRB-_-LRB- RelA_NN -RRB-_-RRB- ,_, c-Rel_NN ,_, and_CC most_RBS likely_JJ other_JJ components_NNS ._. ",
150 + "TREE": "(ROOT (S (NP (NP (DT The) (NNS complexes)) (VP (VBN detected) (PP (IN in) (NP (JJ electrophoretic) (NN mobility) (NN shift) (NNS assays))))) (VP (VBP contain) (NP (NP (NN p50) (, ,) (NN p65) (PRN (-LRB- -LRB-) (NN RelA) (-RRB- -RRB-))) (, ,) (NP (NN c-Rel)) (, ,) (CC and) (NP (ADJP (RBS most) (JJ likely)) (JJ other) (NNS components)))) (. .)))",
151 + "SIMP": [
152 + {
153 + "TYPE": "reduced relative clause",
154 + "TEXT": "The complexes detected in electrophoretic mobility shift assays",
155 + "FROM": 701,
156 + "TO ": 764,
157 + "COMP": [
158 + {
159 + "TYPE": "referred noun phrase",
160 + "FROM": 701,
161 + "TO ": 714
162 + },
163 + {
164 + "TYPE": "clause",
165 + "FROM": 715,
166 + "TO ": 764
167 + }
168 + ]
169 + },
170 + {
171 + "TYPE": "noun or noun phrase coordination",
172 + "TEXT": "p50, p65 (RelA), c-Rel, and most likely other components",
173 + "FROM": 773,
174 + "TO ": 829,
175 + "COMP": [
176 + {
177 + "TYPE": "conjunct",
178 + "FROM": 773,
179 + "TO ": 788
180 + },
181 + {
182 + "TYPE": "conjunct",
183 + "FROM": 790,
184 + "TO ": 795
185 + },
186 + {
187 + "TYPE": "conjunction",
188 + "FROM": 797,
189 + "TO ": 800
190 + },
191 + {
192 + "TYPE": "conjunct",
193 + "FROM": 801,
194 + "TO ": 829
195 + }
196 + ]
197 + }
198 + ]
199 +}
200 +{
201 + "TYPE": "sentence",
202 + "TEXT": "By using transient transfection assays, we found that cross-linking CD40 supports NF-kappa B-dependent gene expression.",
203 + "FROM": 831,
204 + "TO ": 950,
205 + "POS ": "By_IN using_VBG transient_JJ transfection_NN assays_NNS ,_, we_PRP found_VBD that_IN cross-linking_JJ CD40_NN supports_VBZ NF-kappa_NN B-dependent_JJ gene_NN expression_NN ._. ",
206 + "TREE": "(ROOT (S (PP (IN By) (S (VP (VBG using) (NP (JJ transient) (NN transfection) (NNS assays))))) (, ,) (NP (PRP we)) (VP (VBD found) (SBAR (IN that) (S (NP (JJ cross-linking) (NN CD40)) (VP (VBZ supports) (NP (NP (NN NF-kappa)) (NP (JJ B-dependent) (NN gene) (NN expression))))))) (. .)))",
207 + "SIMP": []
208 +}
209 +{
210 + "TYPE": "sentence",
211 + "TEXT": "Our results define the NF-kappa B system as an intermediate event in CD40 signaling and suggest that the CD40 pathway can influence the expression of B cell-associated genes with NF-kappa B consensus sites.",
212 + "FROM": 951,
213 + "TO ": 1157,
214 + "POS ": "Our_PRP$ results_NNS define_VBP the_DT NF-kappa_NN B_NN system_NN as_IN an_DT intermediate_JJ event_NN in_IN CD40_NN signaling_NN and_CC suggest_VBP that_IN the_DT CD40_NN pathway_NN can_MD influence_VB the_DT expression_NN of_IN B_NN cell-associated_JJ genes_NNS with_IN NF-kappa_NN B_NN consensus_NN sites_NNS ._. ",
215 + "TREE": "(ROOT (S (NP (PRP$ Our) (NNS results)) (VP (VP (VBP define) (NP (DT the) (NN NF-kappa) (NN B) (NN system)) (PP (IN as) (NP (NP (DT an) (JJ intermediate) (NN event)) (PP (IN in) (NP (NN CD40) (NN signaling)))))) (CC and) (VP (VBP suggest) (SBAR (IN that) (S (NP (DT the) (NN CD40) (NN pathway)) (VP (MD can) (VP (VB influence) (NP (NP (DT the) (NN expression)) (PP (IN of) (NP (NN B) (JJ cell-associated) (NNS genes)))) (PP (IN with) (NP (NN NF-kappa) (NN B) (NN consensus) (NNS sites))))))))) (. .)))",
216 + "SIMP": [
217 + {
218 + "TYPE": "verb or verb phrase coordination",
219 + "TEXT": "define the NF-kappa B system as an intermediate event in CD40 signaling and suggest that the CD40 pathway can influence the expression of B cell-associated genes with NF-kappa B consensus sites",
220 + "FROM": 963,
221 + "TO ": 1156,
222 + "COMP": [
223 + {
224 + "TYPE": "conjunct",
225 + "FROM": 963,
226 + "TO ": 1034
227 + },
228 + {
229 + "TYPE": "conjunction",
230 + "FROM": 1035,
231 + "TO ": 1038
232 + },
233 + {
234 + "TYPE": "conjunct",
235 + "FROM": 1039,
236 + "TO ": 1156
237 + }
238 + ]
239 + }
240 + ]
241 +}
1 +The B cell-associated surface molecule CD40 functions to regulate B cell responses. Cross-linking CD40 on B cells can lead to homotypic cell adhesion, IL-6 production, and, in combination with cytokines, to Ig isotype switching. Tyrosine kinase activity is increased shortly after engagement of this receptor. Little is known about how the very early events induced by CD40 cross-linking link to cellular responses. In this study, we demonstrate that nuclear factor (NF)-kappa B and NF-kappa B-like transcription factors are activated after cross-linking CD40 on resting human tonsillar B cells and on B cell lines. The activation is rapid and is mediated through a tyrosine kinase-dependent pathway. The complexes detected in electrophoretic mobility shift assays contain p50, p65 (RelA), c-Rel, and most likely other components. By using transient transfection assays, we found that cross-linking CD40 supports NF-kappa B-dependent gene expression. Our results define the NF-kappa B system as an intermediate event in CD40 signaling and suggest that the CD40 pathway can influence the expression of B cell-associated genes with NF-kappa B consensus sites.
...\ No newline at end of file ...\ No newline at end of file
1 +#!/bin/bash
2 +
3 +#Validate arguments
4 +if [[ ! ("$#" == 2 ) ]]; then
5 + echo 'Usage: ./sentence-simplification-main.sh <input_path> <output_file_path>'
6 + exit 1
7 +fi
8 +
9 +SCRIPT_PATH=$(cd `dirname $0` && pwd)
10 +#Define aquí la palabra clave del grupo de oraciones a simplificar.
11 +INPUT_PATH=$1
12 +OUTPUT_INDEX_FILE_PATH=$2
13 +cd $SCRIPT_PATH
14 +
15 +
16 +
17 +
18 +#ANALIZAR EN ISIMP
19 +echo "Analysing in iSimp..."
20 +if [ -z "$(ls -A ./iSimp_sentences/)" ]; then :
21 +else
22 + #echo "Not Empty"
23 + rm ./iSimp_sentences/*
24 +fi
25 +#cd $INPUT_PATH
26 +for j in $INPUT_PATH/*
27 +do
28 + echo $j
29 + #echo "++++entrada_simp: $j salida_simp: $SCRIPT_PATH/iSimp_sentences/$(basename $j)"
30 + $SCRIPT_PATH/isimp_v2/simplify.sh $j $SCRIPT_PATH/iSimp_sentences/$(basename $j)
31 +done
32 +cd $SCRIPT_PATH
33 +
34 +#CREA INDICE DE ARCHIVOS SIMPLIFICADOS
35 +#touch $SCRIPT_PATH/index.txt
36 +>| $OUTPUT_INDEX_FILE_PATH
37 +
38 +#ALIMENTAR A ALGORITMO
39 +echo "Analysing in Algorithm..."
40 +if [ -z "$(ls -A ./algorithm_sentences/)" ]; then :
41 +else
42 + #echo "Not Empty"
43 + rm ./algorithm_sentences/*
44 +fi
45 +#cd ./iSimp_sentences
46 +for k in $SCRIPT_PATH/iSimp_sentences/*
47 +do
48 + echo $k
49 + #echo "entrada: $k salida: $SCRIPT_PATH/algorithm_sentences/$(basename $k) index: $OUTPUT_INDEX_FILE_PATH"
50 + python2 $SCRIPT_PATH/simplifier.py $k $SCRIPT_PATH/algorithm_sentences/$(basename $k) $OUTPUT_INDEX_FILE_PATH
51 +done
52 +cd $SCRIPT_PATH
1 +import copy
2 +import sys
3 +import requests
4 +
5 +class Simp(object):
6 + def __init__(self):
7 + self.TYPE=""
8 + self.TYPEx=0
9 + self.TYPEy=0
10 + self.TEXT=""
11 + self.COMP=[]
12 + def agregarTYPE(self,Type):
13 + self.TYPE=Type
14 + def agregarTEXT(self,text):
15 + self.TEXT=text
16 + def agregarCOMP(self,comp):
17 + self.COMP.append(comp)
18 +
19 +class Frase(object):
20 + def __init__(self):
21 + self.TYPE=""
22 + self.TEXT=""
23 + self.POS=""
24 + self.TREE=""
25 + self.SIMP=[]
26 + def agregarTYPE(self,Type):
27 + self.TYPE=Type
28 + def agregarTEXT(self,text):
29 + self.TEXT=text
30 + def agregarPOS(self,Pos):
31 + self.POS=Pos
32 + def agregarTREE(self,Tree):
33 + self.TREE=Tree
34 + def agregarSIMP(self):
35 + self.SIMP.append(Simp())
36 +
37 +class Sentence(object):
38 + def __init__(self):
39 + self.FLAG=True
40 + self.TEXT=""
41 + self.TREE=""
42 + self.SIMP=[]
43 + def agregarTEXT(self,text):
44 + self.TEXT=text
45 + def agregarTREE(self,Tree):
46 + self.TREE=Tree
47 + def agregarSIMP(self):
48 + self.SIMP.append(Simp())
49 +
50 +
51 +MEMORIAB=[]
52 +MEMORIAA=[]
53 +
54 +
55 +#----lectura de datos desde archivo
56 +arch=(sys.argv[1])
57 +f = open(arch)
58 +dato = f.read().splitlines()
59 +f.close
60 +frase=Frase()
61 +for i in range(len(dato)):
62 + if 'TYPE: ' in dato[i][0:6]:
63 + frase.agregarTYPE(dato[i][6:])
64 + elif 'TEXT: ' in dato[i][0:6]:
65 + frase.agregarTEXT(dato[i][6:])
66 + elif 'POS : ' in dato[i][0:6]:
67 + frase.agregarPOS(dato[i][6:])
68 + elif 'TREE: ' in dato[i][0:6]:
69 + frase.agregarTREE(dato[i][6:])
70 + elif 'SIMP:' in dato[i]:
71 + frase.agregarSIMP()
72 + elif ' TYPE: ' in dato[i][0:8]:
73 + frase.SIMP[-1].agregarTYPE(dato[i][8:])
74 + elif ' TEXT: ' in dato[i][0:8]:
75 + frase.SIMP[-1].agregarTEXT(dato[i][8:])
76 + elif ' COMP: ' in dato[i]:
77 + frase.SIMP[-1].agregarCOMP(dato[i][8:])
78 +#------------
79 +
80 +
81 +#-------Programa principal
82 +#Algoritmo v4
83 +
84 +
85 +if ((frase.TYPE.find('sentence')) !=- 1) and (frase.SIMP!=[]) and (frase.SIMP[0].TYPE != ''):
86 + y=1
87 + w=1
88 + SIMPworkspace=[]
89 + # copia TREE y cada SIMP a SENTENCE.1
90 + Sentence1=Sentence()
91 + Sentence1.TREE=copy.deepcopy(frase.TREE)
92 + Sentence1.TEXT=copy.deepcopy(frase.TEXT)
93 + for i in range(len(frase.SIMP)):
94 + #Sentence1.SIMP.append(Simp())
95 + #Sentence1.SIMP[i]=copy.deepcopy(frase.SIMP[i])
96 + SIMPworkspace.append(Simp())
97 + SIMPworkspace[i]=copy.deepcopy(frase.SIMP[i])
98 +
99 +## ORDENAMIENTO DE SIMPs
100 + for i in range(len(SIMPworkspace)):
101 + #print SIMPworkspace[i].TEXT
102 + #print SIMPworkspace[i].TYPE
103 + SIMPworkspace[i].TYPEx = int(SIMPworkspace[i].TYPE[SIMPworkspace[i].TYPE.find('[')+1:SIMPworkspace[i].TYPE.find('..')])
104 + SIMPworkspace[i].TYPEy = int(SIMPworkspace[i].TYPE[SIMPworkspace[i].TYPE.find('..')+2:SIMPworkspace[i].TYPE.find(']')])
105 + if 'parenthesis' in SIMPworkspace[i].TYPE:
106 + SIMPworkspace[i].TYPEy = SIMPworkspace[i].TYPEy + 2
107 + #print SIMPworkspace[i].TYPEx
108 + #print SIMPworkspace[i].TYPEy
109 +
110 +
111 + SIMPworkspace.sort(key=lambda x: x.TYPEy, reverse=True)
112 + SIMPworkspace.sort(key=lambda x: x.TYPEx)
113 +
114 +
115 + # for i in range(len(SIMPworkspace)):
116 + # print "\nSIMP " + str(i) + " :"
117 + # print SIMPworkspace[i].TYPE
118 + # print SIMPworkspace[i].TYPEx
119 + # print SIMPworkspace[i].TYPEy
120 + # print "\n"
121 +
122 + for i in range(len(SIMPworkspace)):
123 + Sentence1.SIMP.append(Simp())
124 + Sentence1.SIMP[i]=copy.deepcopy(SIMPworkspace[i])
125 +
126 +
127 + # Agrega la oracion original Sentence1 a la memoria como primer objeto en ser analizado
128 + MEMORIAB.append(Sentence())
129 + MEMORIAB[0]=copy.deepcopy(Sentence1)
130 +
131 +
132 +
133 + # 1 entrada al bucle A por cada SIMP diferente en Sentence1
134 + numSimp=len(Sentence1.SIMP)
135 + s = 0
136 + #bucle A
137 + while s < numSimp :
138 + #print "\nEntro por vez " + str(s) + " al bucle A"
139 + #print "Analizando todos los SIMP de tipo: " + MEMORIAB[0].SIMP[s].TYPE
140 + #Entra al bucle B el numero de veces igual al numerode elementos en MEMORIAB
141 + numMEM = len(MEMORIAB)
142 + t = 0
143 + #bucle B
144 + while t < numMEM :
145 + #print "Entro por vez " + str(t) + " al bucle B"
146 + #Entra si la oracion no ha sido analizada antes (FLAG==True) y si el texto del simp esta presente en la oracion.
147 + #print "CONDICIONES:"
148 + #print "SIMP " + MEMORIAB[0].SIMP[s].TEXT
149 + #print "SIMP " + MEMORIAB[0].SIMP[s].TYPE
150 + #print "MEMB " + str(MEMORIAB[t].FLAG)
151 + #print "MEMB " + MEMORIAB[t].TEXT
152 + if ( MEMORIAB[0].SIMP[s].TEXT in MEMORIAB[t].TEXT ) and ( MEMORIAB[t].FLAG == True ):
153 + MEMORIAB[t].FLAG = False
154 + #print "False to: " + MEMORIAB[t].TEXT
155 + #print "Entro a condicional"
156 + #Reglas de simplificacion
157 + if ( 'coordination' in MEMORIAB[t].SIMP[s].TYPE ) and ( not ('sentence coordination' in MEMORIAB[t].SIMP[s].TYPE ) ) :
158 + #print "Aplico regla coord"
159 + TEMPORALES = []
160 + c = len(MEMORIAB[t].SIMP[s].COMP)
161 + #print "Hay " + str(c) + " COMP en este SIMP"
162 + tt = 0
163 + while c > 0 :
164 + c = c - 1
165 + if ( 'conjunct' in MEMORIAB[0].SIMP[s].COMP[c] ) and ( not ( 'conjunction' in MEMORIAB[0].SIMP[s].COMP[c] ) ) :
166 + TEMPORALES.append(Sentence())
167 + TEMPORALES[tt] = copy.deepcopy(MEMORIAB[t])
168 + replaced = MEMORIAB[0].SIMP[s].TEXT
169 + indice1 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('[')+1:TEMPORALES[tt].SIMP[s].COMP[c].find('..')])
170 + indice2 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('..')+2:TEMPORALES[tt].SIMP[s].COMP[c].find(']')])
171 + replacer = MEMORIAB[0].TEXT[indice1:indice2]
172 + TEMPORALES[tt].TEXT = TEMPORALES[tt].TEXT.replace(replaced,replacer)
173 + tt = tt + 1
174 + #copiar simplificaciones de memoria temporal a MEMORIAB
175 + indtempamem = 0
176 + while indtempamem < len(TEMPORALES) :
177 + MEMORIAB.append(Sentence())
178 + MEMORIAB[-1]=copy.deepcopy(TEMPORALES[indtempamem])
179 + MEMORIAB[-1].FLAG = True
180 + #print MEMORIAB[-1].TEXT
181 + indtempamem = indtempamem + 1
182 + elif 'parenthesis' in MEMORIAB[t].SIMP[s].TYPE:
183 + #print "Aplico regla par"
184 + TEMPORALES = []
185 + c = len(MEMORIAB[t].SIMP[s].COMP)
186 + #print "Hay " + str(c) + " COMP en este SIMP"
187 + tt = 0
188 + while c > 0 :
189 + #print "entro al while de par"
190 + c = c - 1
191 + TEMPORALES.append(Sentence())
192 + TEMPORALES[tt] = copy.deepcopy(MEMORIAB[t])
193 + replaced = MEMORIAB[0].SIMP[s].TEXT + ' )'
194 + indice1 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('[')+1:TEMPORALES[tt].SIMP[s].COMP[c].find('..')])
195 + indice2 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('..')+2:TEMPORALES[tt].SIMP[s].COMP[c].find(']')])
196 + replacer = MEMORIAB[0].TEXT[indice1:indice2]
197 + #print "replaced: " + replaced
198 + #print "replacer: " + replacer
199 + TEMPORALES[tt].TEXT = TEMPORALES[tt].TEXT.replace(replaced,replacer)
200 + tt = tt + 1
201 + #copiar simplificaciones de memoria temporal a MEMORIAB
202 + indtempamem = 0
203 + while indtempamem < len(TEMPORALES) :
204 + MEMORIAB.append(Sentence())
205 + MEMORIAB[-1]=copy.deepcopy(TEMPORALES[indtempamem])
206 + MEMORIAB[-1].FLAG = True
207 + #print MEMORIAB[-1].TEXT
208 + indtempamem = indtempamem + 1
209 + elif 'apposition' in MEMORIAB[t].SIMP[s].TYPE:
210 + #print "Aplico regla Apposition"
211 + TEMPORALES = []
212 + c = len(MEMORIAB[t].SIMP[s].COMP)
213 + #print "Hay " + str(c) + " COMP en este SIMP"
214 + tt = 0
215 + while c > 0 :
216 + #print "entro al while de par"
217 + c = c - 1
218 + TEMPORALES.append(Sentence())
219 + TEMPORALES[tt] = copy.deepcopy(MEMORIAB[t])
220 + replaced = MEMORIAB[0].SIMP[s].TEXT
221 + indice1 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('[')+1:TEMPORALES[tt].SIMP[s].COMP[c].find('..')])
222 + indice2 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('..')+2:TEMPORALES[tt].SIMP[s].COMP[c].find(']')])
223 + replacer = MEMORIAB[0].TEXT[indice1:indice2]
224 + #print "replaced: " + replaced
225 + #print "replacer: " + replacer
226 + TEMPORALES[tt].TEXT = TEMPORALES[tt].TEXT.replace(replaced,replacer)
227 + tt = tt + 1
228 + #copiar simplificaciones de memoria temporal a MEMORIAB
229 + indtempamem = 0
230 + while indtempamem < len(TEMPORALES) :
231 + MEMORIAB.append(Sentence())
232 + MEMORIAB[-1]=copy.deepcopy(TEMPORALES[indtempamem])
233 + MEMORIAB[-1].FLAG = True
234 + #print "Copio a memoria: " + MEMORIAB[-1].TEXT
235 + indtempamem = indtempamem + 1
236 + elif 'member-collection' in MEMORIAB[t].SIMP[s].TYPE:
237 + #print "Aplico regla member-collection"
238 + TEMPORALES = []
239 + c = len(MEMORIAB[t].SIMP[s].COMP)
240 + #print "Hay " + str(c) + " COMP en este SIMP"
241 + tt = 0
242 + while c > 0 :
243 + #print "entro al while de mem"
244 + c = c - 1
245 + TEMPORALES.append(Sentence())
246 + TEMPORALES[tt] = copy.deepcopy(MEMORIAB[t])
247 + replaced = MEMORIAB[0].SIMP[s].TEXT
248 + indice1 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('[')+1:TEMPORALES[tt].SIMP[s].COMP[c].find('..')])
249 + indice2 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('..')+2:TEMPORALES[tt].SIMP[s].COMP[c].find(']')])
250 + replacer = MEMORIAB[0].TEXT[indice1:indice2]
251 + #print "replaced: " + replaced
252 + #print "replacer: " + replacer
253 + TEMPORALES[tt].TEXT = TEMPORALES[tt].TEXT.replace(replaced,replacer)
254 + tt = tt + 1
255 + #copiar simplificaciones de memoria temporal a MEMORIAB
256 + indtempamem = 0
257 + while indtempamem < len(TEMPORALES) :
258 + MEMORIAB.append(Sentence())
259 + MEMORIAB[-1]=copy.deepcopy(TEMPORALES[indtempamem])
260 + MEMORIAB[-1].FLAG = True
261 + #print "Copio a memoria: " + MEMORIAB[-1].TEXT
262 + indtempamem = indtempamem + 1
263 + elif 'sentence coordination' in MEMORIAB[t].SIMP[s].TYPE:
264 + #print "Aplico regla Verb"
265 + TEMPORALES = []
266 + c = len(MEMORIAB[t].SIMP[s].COMP)
267 + #print "Hay " + str(c) + " COMP en este SIMP"
268 + tt = 0
269 + while c > 0 :
270 + c = c - 1
271 + if ( 'conjunct' in MEMORIAB[0].SIMP[s].COMP[c] ) and ( not ( 'conjunction' in MEMORIAB[0].SIMP[s].COMP[c] ) ) :
272 + TEMPORALES.append(Sentence())
273 + TEMPORALES[tt] = copy.deepcopy(MEMORIAB[t])
274 + #sustituye todo el contenido de TEMPORAL.r/TREE, por el contenido la oracion coordinada
275 + #replaced = MEMORIAB[0].SIMP[s].TEXT
276 + indice1 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('[')+1:TEMPORALES[tt].SIMP[s].COMP[c].find('..')])
277 + indice2 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('..')+2:TEMPORALES[tt].SIMP[s].COMP[c].find(']')])
278 + replacer = MEMORIAB[0].TEXT[indice1:indice2]
279 + #print replacer
280 + TEMPORALES[tt].TEXT = replacer
281 + ## si la oracion no termina en punto o !
282 + tt = tt + 1
283 + #copiar simplificaciones de memoria temporal a MEMORIAB
284 + indtempamem = 0
285 + while indtempamem < len(TEMPORALES) :
286 + MEMORIAB.append(Sentence())
287 + MEMORIAB[-1]=copy.deepcopy(TEMPORALES[indtempamem])
288 + MEMORIAB[-1].FLAG = True
289 + #print MEMORIAB[-1].TEXT
290 + indtempamem = indtempamem + 1
291 + elif 'full relative clause' in MEMORIAB[t].SIMP[s].TYPE:
292 + #print "Aplico regla RelCl"
293 + TEMPORALES = []
294 + c = 0
295 + tt = 0
296 + while c < 2 :
297 + if 'referred noun phrase' in MEMORIAB[0].SIMP[s].COMP[c] :
298 + TEMPORALES.append(Sentence())
299 + TEMPORALES[tt] = copy.deepcopy(MEMORIAB[t]) #ok
300 + if MEMORIAB[0].TEXT[MEMORIAB[0].TEXT.index(TEMPORALES[tt].SIMP[s].TEXT)+len(TEMPORALES[tt].SIMP[s].TEXT)-1] == ',':
301 + replaced = MEMORIAB[0].SIMP[s].TEXT + ',' #posible error, si es asi probar con ' ,'
302 + else:
303 + replaced = MEMORIAB[0].SIMP[s].TEXT
304 + indice1 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('[')+1:TEMPORALES[tt].SIMP[s].COMP[c].find('..')])
305 + indice2 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('..')+2:TEMPORALES[tt].SIMP[s].COMP[c].find(']')])
306 + replacer = MEMORIAB[0].TEXT[indice1:indice2]
307 + TEMPORALES[tt].TEXT = TEMPORALES[tt].TEXT.replace(replaced,replacer)
308 + indice3 = indice1
309 + indice4 = indice2
310 + if 'clause' in MEMORIAB[0].SIMP[s].COMP[c] :
311 + TEMPORALES.append(Sentence())
312 + TEMPORALES[tt] = copy.deepcopy(MEMORIAB[t]) #ok
313 + indice1 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('[')+1:TEMPORALES[tt].SIMP[s].COMP[c].find('..')])
314 + indice2 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('..')+2:TEMPORALES[tt].SIMP[s].COMP[c].find(']')])
315 + TEMPORALES[tt].TEXT = copy.deepcopy(MEMORIAB[0].TEXT[indice3:indice4]+' '+MEMORIAB[0].TEXT[indice1:indice2] ) ##
316 + cad3 = MEMORIAB[0].TEXT[indice1:indice2]
317 + cad4 = cad3.split()
318 + if (cad4[0]+'_WDT') in frase.POS:
319 + TEMPORALES[tt].TEXT = TEMPORALES[tt].TEXT.replace(' '+cad4[0],'')
320 + tt = tt + 1
321 + c = c + 1
322 + #copiar simplificaciones de memoria temporal a MEMORIAB
323 + indtempamem = 0
324 + while indtempamem < len(TEMPORALES) :
325 + MEMORIAB.append(Sentence())
326 + MEMORIAB[-1]=copy.deepcopy(TEMPORALES[indtempamem])
327 + MEMORIAB[-1].FLAG = True
328 + #print MEMORIAB[-1].TEXT
329 + indtempamem = indtempamem + 1
330 + elif 'reduced relative clause' in MEMORIAB[t].SIMP[s].TYPE:
331 + #print "Aplico regla RelCl"
332 + TEMPORALES = []
333 + c = 0
334 + tt = 0
335 + while c < 2 :
336 + if 'referred noun phrase' in MEMORIAB[0].SIMP[s].COMP[c] :
337 + TEMPORALES.append(Sentence())
338 + TEMPORALES[tt] = copy.deepcopy(MEMORIAB[t]) #ok
339 + replaced = MEMORIAB[0].SIMP[s].TEXT
340 + indice1 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('[')+1:TEMPORALES[tt].SIMP[s].COMP[c].find('..')])
341 + indice2 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('..')+2:TEMPORALES[tt].SIMP[s].COMP[c].find(']')])
342 + replacer = MEMORIAB[0].TEXT[indice1:indice2]
343 + #subj = MEMORIAB[0].TEXT[indice1:(indice2+1)]
344 + subj = MEMORIAB[0].TEXT[indice1:(indice2)]
345 + TEMPORALES[tt].TEXT = TEMPORALES[tt].TEXT.replace(replaced,replacer)
346 + if 'clause' in MEMORIAB[0].SIMP[s].COMP[c] :
347 + TEMPORALES.append(Sentence())
348 + TEMPORALES[tt] = copy.deepcopy(MEMORIAB[t]) #el referente debera estar antes que la clausula para tener orden correcto
349 + indice1 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('[')+1:TEMPORALES[tt].SIMP[s].COMP[c].find('..')])
350 + indice2 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('..')+2:TEMPORALES[tt].SIMP[s].COMP[c].find(']')])
351 + replacer = MEMORIAB[0].TEXT[indice1:indice2]
352 + TEMPORALES[tt].TEXT = subj + " _ " + replacer #en este punto para ingresar copula necesitas info de numero y tiempo
353 + tt = tt + 1
354 + c = c + 1
355 + #copiar simplificaciones de memoria temporal a MEMORIAB
356 + indtempamem = 0
357 + while indtempamem < len(TEMPORALES) :
358 + MEMORIAB.append(Sentence())
359 + MEMORIAB[-1]=copy.deepcopy(TEMPORALES[indtempamem])
360 + MEMORIAB[-1].FLAG = True
361 + #print MEMORIAB[-1].TEXT
362 + indtempamem = indtempamem + 1
363 + elif 'hypernymy' in MEMORIAB[t].SIMP[s].TYPE:
364 + print "**hypernymy detected**"
365 + #print "True to: " + MEMORIAB[t].TEXT
366 + MEMORIAB[t].FLAG = True
367 + else:
368 + print "Error: Unknown simplification construct detected."
369 + #print "True to: " + MEMORIAB[t].TEXT
370 + MEMORIAB[t].FLAG = True
371 + t = t + 1
372 + s = s + 1
373 +
374 + #CONDICIONES PARA IMPRESION DE SIMPLIFICACIONES EN ARCHIVO DE TEXTO
375 + print "Sentence simplificated. New sentences generated:"
376 + for i in range(len(MEMORIAB)):
377 + #se reutiliza flag para marcar las oraciones finales
378 + MEMORIAB[i].FLAG = True
379 + for j in range(len(MEMORIAB[0].SIMP)):
380 + #NOTA: si se agrega un constructo simplificable, anadirlo tambien a esta lista:
381 + if ( ('member-collection' in MEMORIAB[0].SIMP[j].TYPE) or ('apposition' in MEMORIAB[0].SIMP[j].TYPE) or ('coordination' in MEMORIAB[0].SIMP[j].TYPE) or ('parenthesis' in MEMORIAB[0].SIMP[j].TYPE) or ('sentence coordination' in MEMORIAB[0].SIMP[j].TYPE) or ('full relative clause' in MEMORIAB[0].SIMP[j].TYPE) or ('reduced relative clause' in MEMORIAB[0].SIMP[j].TYPE) ) and (MEMORIAB[0].SIMP[j].TEXT in MEMORIAB[i].TEXT) :
382 + MEMORIAB[i].FLAG = False
383 +
384 + ##areglar numeracion archivos salida ej 011
385 + arcsalnum = 0
386 + for i in range(len(MEMORIAB)):
387 + if MEMORIAB[i].FLAG == True:
388 + arcsalnum = arcsalnum + 1
389 + length = len(str(arcsalnum))
390 + #print('{:03d}'.format(arcsalnum)) # python >= 2.7 + python3
391 +# >>> n = '4'
392 +#>>> print n.zfill(3)
393 + arcsalnum = 0
394 + for i in range(len(MEMORIAB)):
395 + if MEMORIAB[i].FLAG == True:
396 + arcsalnum = arcsalnum + 1
397 + print MEMORIAB[i].TEXT#Salida
398 + archSalNombre = sys.argv[2]
399 + archSalNombre=archSalNombre[:-4] + "-" + (str(arcsalnum)).zfill(length) + '.alg'
400 + archivoSalida=open(archSalNombre,"w")
401 + archivoSalida.write(MEMORIAB[i].TEXT+"\n")##
402 + archivoSalida.close()
403 + #WRITE OUTPUT FILE PATH TO INDEX (Arg 3)
404 + index_name = sys.argv[3]
405 + index = open(index_name, "a+")
406 + archSalNombreforIndex=archSalNombre + "\n"
407 + index.write(archSalNombreforIndex)
408 + index.close()
409 +else:
410 + print frase.TEXT #----Salida si no habia constructos simplificables
411 + archSalNombre = sys.argv[2]
412 + archSalNombre = archSalNombre[:-4] + ".alg"
413 + archivoSalida = open(archSalNombre,"a+")
414 + archivoSalida.write(frase.TEXT+"\n")##
415 + archivoSalida.close()
416 + #WRITE OUTPUT FILE PATH TO INDEX (Arg 3)
417 + index_name = sys.argv[3]
418 + index = open(index_name, "a+")
419 + archSalNombreforIndex=archSalNombre + "\n"
420 + index.write(archSalNombreforIndex)
421 + index.close()
422 +
423 +
424 +#FIN