Showing
126 changed files
with
2174 additions
and
0 deletions
isimp_v2/README
0 → 100644
1 | +How to run | ||
2 | + | ||
3 | +$ bash $ISIMP/simplify.sh -json [INPUT] [OUTPUT] | ||
4 | + | ||
5 | +You can place files anywhere and run the above commend. The INPUT file should contain plain text. The script will simplify the text and output the simplification constructs (with location information) in JSON format. For other usage, please try | ||
6 | + | ||
7 | +$bash $ISIMP/simplify.sh -help | ||
8 | +usage: Console [OPTIONS] [INPUT] [OUTPUT] | ||
9 | + Tag the POS, parse the sentences, and detect simplification | ||
10 | + constructs in the sentences. | ||
11 | + By default, assume the document is not tokenized and | ||
12 | + sentence-splited. Therefore, these two tasks will be done | ||
13 | + first. | ||
14 | + -help display this help and exit | ||
15 | + -json print file in JSON format. If not set, print file in plain | ||
16 | + text format | ||
17 | + -parse_only If set, parse the document only | ||
18 | + -tokenized set input tokenized. If not set, assume the document is not | ||
19 | + tokenized and ssplited. | ||
20 | + | ||
21 | + |
isimp_v2/bin/adapter/AdapterPattern.class
0 → 100644
No preview for this file type
No preview for this file type
isimp_v2/bin/adapter/OtherAdapter.class
0 → 100644
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
isimp_v2/bin/annotator/ISimpAnnotator.class
0 → 100644
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
isimp_v2/bin/detect/DetectionPattern.class
0 → 100644
No preview for this file type
No preview for this file type
isimp_v2/bin/detect/ISimp.class
0 → 100644
No preview for this file type
isimp_v2/bin/detect/ISimpBuilder.class
0 → 100644
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
isimp_v2/bin/extractor/ISimpExtractor.class
0 → 100644
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
isimp_v2/bin/main/BioCMain.class
0 → 100644
No preview for this file type
isimp_v2/bin/main/Console.class
0 → 100644
No preview for this file type
isimp_v2/bin/main/EvalSimp.class
0 → 100644
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
isimp_v2/bin/main/JsonAdapters.class
0 → 100644
No preview for this file type
isimp_v2/bin/main/SentencePrinter$1.class
0 → 100644
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
isimp_v2/bin/main/SentencePrinter.class
0 → 100644
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
isimp_v2/bin/test/ISimpTest.class
0 → 100644
No preview for this file type
isimp_v2/bin/test/JSonTest.class
0 → 100644
No preview for this file type
isimp_v2/bin/test/StanfordCoreNLPTest.class
0 → 100644
No preview for this file type
isimp_v2/bin/utils/DetailedLabelWriter.class
0 → 100644
No preview for this file type
isimp_v2/bin/utils/LabelWriter.class
0 → 100644
No preview for this file type
isimp_v2/bin/utils/PatternEnv.class
0 → 100644
No preview for this file type
isimp_v2/bin/utils/PtbUtils.class
0 → 100644
No preview for this file type
isimp_v2/bin/utils/PtbWriter$1.class
0 → 100644
No preview for this file type
isimp_v2/bin/utils/PtbWriter.class
0 → 100644
No preview for this file type
isimp_v2/bin/utils/Utils.class
0 → 100644
No preview for this file type
isimp_v2/lib/bioc.jar
0 → 100644
No preview for this file type
isimp_v2/lib/commons-cli-1.2.jar
0 → 100644
No preview for this file type
No preview for this file type
isimp_v2/lib/commons-io-2.4.jar
0 → 100644
No preview for this file type
isimp_v2/lib/commons-lang3-3.1.jar
0 → 100644
No preview for this file type
isimp_v2/lib/gson-2.2.4.jar
0 → 100644
No preview for this file type
isimp_v2/lib/junit-4.11.jar
0 → 100644
No preview for this file type
This file is too large to display.
isimp_v2/lib/stanford-corenlp-3.2.0.jar
0 → 100644
No preview for this file type
isimp_v2/rules/adapter.txt
0 → 100644
1 | +// (VP) (CC) (VP NP) --> (VP CC VP) NP | ||
2 | +tregex: VP=p <1 (VP=c1 <: VBN|VBZ|VBD|VBP|VBG=cc1) <2 CC|CONJP <3 (VP=v2 <1 ~cc1=cc2 <2 NP|S=others) <- =v2 | ||
3 | +operation: move others $- p | ||
4 | + | ||
5 | +// (VP) (CC) (VP NP) (...) --> (VP CC VP) NP (...) | ||
6 | +tregex: VP=p <1 (VP=c1 <: VBN|VBZ|VBD|VBP|VBG=cc1) <2 CC|CONJP=cc <3 (VP=v2 <1 ~cc1=cc2 <2 NP|S=others) !<- =v2 | ||
7 | +operation: move cc $- cc1, move cc2 $- cc, move c1 $+ others | ||
8 | + | ||
9 | +tregex: VP=p <1 (VBN|VBZ|VBD|VBP|VBG=cc1) <2 CC|CONJP <3 (~cc1=c2) <4 NP=others <- =others | ||
10 | +operation: move others $- p | ||
11 | + | ||
12 | +//tregex: NP=p <1 NNS=nns <2 /,/ <3 NN=n1 <4 CC=cc <5 NN=n2 | ||
13 | +//operation: adjoinF (NP @) n1, move cc $- n1, move n2 $- cc | ||
14 | + | ||
15 | +tregex: VP=p < (VBN|VBZ|VBD|VBP|VBG=cc1 $+ (CC|CONJP $+ (~cc1=c2)) !>1 =p) | ||
16 | +operation: move others $- p |
isimp_v2/rules/apposition-detect.txt
0 → 100644
1 | +// apposition | ||
2 | +// 33 | ||
3 | +tregex: NP=p <1 (@/N.*/=np1 !<< /,/ !<<, DT) <2 /,/ <3 (@/N.*/=np2 !<< /,/ <<, DT|CD) !< CC <- =np2 | ||
4 | +// 22 | ||
5 | +tregex: NP=p <1 (@/N.*/=np1 !<< /,/ !<<, DT) <2 /,/ <3 (@/N.*/=np2 !<< /,/ <<, DT) !< CC <- /,/ | ||
6 | +// 17 | ||
7 | +tregex: NP=p <1 (@/N.*/=np1 !<< /,/ <<, DT) <2 /,/ <3 (@/N.*/=np2 !<< /,/ !<< DT) !< CC <- =np2 | ||
8 | +// 21 | ||
9 | +tregex: NP=p <1 (@/N.*/=np1 !<< /,/ <<, DT) <2 /,/ <3 (@/N.*/=np2 !<< /,/ !<< DT) !< CC <- /,/ | ||
10 | +// 33 | ||
11 | +tregex: NP=p < (NP=np1 $++ (PP <1 (VBG < /including/) <2 NP=np2)) | ||
12 | +// 4 | ||
13 | +tregex: NP=p <1 (NP=np1 <1 CD) <2 /,/ <3 NP=np2 <4 /,/=end <- =end | ||
14 | +// 3 | ||
15 | +tregex: NP=p <1 (NP=np1 << CD <2 (PP <<, /of/)) <2 /,/ <3 NP=np2 | ||
16 | + | ||
17 | +// NP(DT ...) NP(NN) | ||
18 | +// 8 | ||
19 | +tregex: NP=p <1 (NP=np1 << DT) <2 (NP=np2 <: NN) | ||
20 | + | ||
21 | +// the NP , a NP | ||
22 | +tregex: NP=p <1 (NP=np1 <<, /the/) <2 /,/ <3 (NP=np2 <<, /^(a|an)_/) | ||
23 | + | ||
24 | +// DT ... NNS NP(no CC) | ||
25 | +// 4 | ||
26 | +tregex: NP=p <1 DT|JJ <-1 (NP=np2 !< CC !< /,/ $- NNS) | ||
27 | + | ||
28 | +// DT NP(... NNS) NP | ||
29 | +// 0 | ||
30 | +tregex: NP=p <1 DT|JJ <2 (NP <- NNS) <3 (NP=np2 !< CC !< /,/) <- =np2 | ||
31 | + | ||
32 | +// NP : NP | ||
33 | +// 16 | ||
34 | +tregex: NP=p <1 NP=np1 <2 /:/ <3 (NP=np2 !<< CC !<< /,/) <- =np2 | ||
35 | + | ||
36 | +// NNS , NN CC NN |
isimp_v2/rules/apposition.txt
0 → 100644
1 | +// apposition | ||
2 | +// 33 | ||
3 | +tregex: NP=p <1 (@/N.*/=np1 !<< /,/ !<<, DT) <2 /,/ <3 (@/N.*/=np2 !<< /,/ <<, DT|CD) !< CC <- =np2 | ||
4 | +// 22 | ||
5 | +tregex: NP=p <1 (@/N.*/=np1 !<< /,/ !<<, DT) <2 /,/ <3 (@/N.*/=np2 !<< /,/ <<, DT) !< CC <- /,/ | ||
6 | +// 17 | ||
7 | +tregex: NP=p <1 (@/N.*/=np1 !<< /,/ <<, DT) <2 /,/ <3 (@/N.*/=np2 !<< /,/ !<< DT) !< CC <- =np2 | ||
8 | +// 21 | ||
9 | +tregex: NP=p <1 (@/N.*/=np1 !<< /,/ <<, DT) <2 /,/ <3 (@/N.*/=np2 !<< /,/ !<< DT) !< CC <- /,/ | ||
10 | +// 33 | ||
11 | +tregex: NP=p < (NP=np1 $++ (PP <1 (VBG < /including/) <2 NP=np2)) | ||
12 | +// 4 | ||
13 | +tregex: NP=p <1 (NP=np1 <1 CD) <2 /,/ <3 NP=np2 <4 /,/=end <- =end | ||
14 | +// 3 | ||
15 | +tregex: NP=p <1 (NP=np1 << CD <2 (PP <<, /of/)) <2 /,/ <3 NP=np2 | ||
16 | +operation: replace p np1 | ||
17 | +operation: replace p np2 | ||
18 | + | ||
19 | +// NP(DT ...) NP(NN) | ||
20 | +// 8 | ||
21 | +tregex: NP=p <1 (NP=np1 << DT) <2 (NP=np2 <: NN) | ||
22 | +operation: replace p np1 | ||
23 | +operation: replace p np2 | ||
24 | + | ||
25 | +// the NP , a NP | ||
26 | +tregex: NP=p <1 (NP=np1 <<, /the/) <2 /,/ <3 (NP=np2 <<, /^(a|an)_/) | ||
27 | +operation: replace p np1 | ||
28 | +operation: replace p np2 | ||
29 | + | ||
30 | +// DT ... NNS NP(no CC) | ||
31 | +// 4 | ||
32 | +tregex: NP=p <1 DT|JJ <-1 (NP=np2 !< CC !< /,/ $- NNS) | ||
33 | +operation: replace p np2 | ||
34 | +operation: prune np2 | ||
35 | + | ||
36 | +// DT NP(... NNS) NP | ||
37 | +// 0 | ||
38 | +tregex: NP=p <1 DT|JJ <2 (NP <- NNS) <3 (NP=np2 !< CC !< /,/) <- =np2 | ||
39 | +operation: replace p np2 | ||
40 | +operation: prune np2 | ||
41 | + | ||
42 | +// NP : NP | ||
43 | +// 16 | ||
44 | +tregex: NP=p <1 NP=np1 <2 /:/ <3 (NP=np2 !<< CC !<< /,/) <- =np2 | ||
45 | +operation: replace p np1 | ||
46 | +operation: replace p np2 |
isimp_v2/rules/coordination-adapter.txt
0 → 100644
1 | +// vbn coordination | ||
2 | +tregex: VP=p < (VBN|VBZ|VBD|VBP|VBG|VB=cc1 $+ (CC|CONJP=cc $+ (~cc1=cc2)) !>1 =p) | ||
3 | +operation: adjoinF (VP @) cc1, move cc $- cc1, move cc2 $- cc | ||
4 | + | ||
5 | +tregex: VP=p < (VBN|VBZ|VBD|VBP|VBG|VB=cc1 $+ (CC|CONJP=cc $+ (~cc1=cc2 $+ __))) | ||
6 | +operation: adjoinF (VP @) cc1, move cc $- cc1, move cc2 $- cc | ||
7 | + | ||
8 | +// np , (np ... cc np) | ||
9 | +tregex: NP=np <: NN|NNS=n1 $+ (/,/=comma $+ (NP <1 (NN|NNS=n2 $++ (CC [$+ NN|NNS | $+ (NP <: NN|NNS)] )))) | ||
10 | +operation: move comma $+ n2, move n1 $+ comma, prune np | ||
11 | + | ||
12 | +// NN , NN | ||
13 | +tregex: NP=np <1 NN=nn1 <2 /,/ <3 NN=nn2 <- =nn2 | ||
14 | +operation: excise np np |
isimp_v2/rules/coordination-detect.txt
0 → 100644
1 | +// mrna | ||
2 | +tregex: NP=p < ((NP=c1 <<- /mRNA/=mrna) $+ (CC=cc $+ (NP=c2 <<, /protein|surface/=protein))) | ||
3 | +tregex: NP=p < ((NN=c2 <<: /surface/=protein) $+ (CC=cc $+ (NN=c1 <<: /mRNA/=mrna))) | ||
4 | +tregex: NP=p < ((NP=c1 <<- /protein|surface/=protein) $+ (CC=cc $+ (NP=c2 <<, /mRNA/=mrna))) | ||
5 | + | ||
6 | +// NP cc NP (mRNA) | ||
7 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 CC <3 (NP=c2 <1 __=nn <2 (NN <<: /mRNA/)) | ||
8 | + | ||
9 | +// NP , NP cc NP (mRNA) | ||
10 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 CC <5 (NP=c3 <1 __=nn <2 (NN <<: /mRNA/)) | ||
11 | + | ||
12 | +// NP , NP , cc NP (mRNA) | ||
13 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 CC <6 (NP==c3 <1 __=nn <2 (NN <<: /mRNA/)) | ||
14 | + | ||
15 | +// NP , NP , NP cc NP (mRNA) | ||
16 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 CC <7 (NP=c4 <1 __=nn <2 (NN <<: /mRNA/)) | ||
17 | + | ||
18 | +// NP , NP , NP , cc NP (mRNA) | ||
19 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 /,/ <7 CC <8 (NP=c4 <1 __=nn <2 (NN <<: /mRNA/)) | ||
20 | + | ||
21 | +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=c2 <1 (NN=n5) <- =n5) <- =c2 | ||
22 | + | ||
23 | +tregex: ADJP=n0 <1 (ADJP=c1 <1 (JJ=n2) <- =n2) <2 (CC=n3) <3 (ADJP=c2 <1 (JJ=n5) <- =n5) <- =c2 | ||
24 | + | ||
25 | +// 162 | ||
26 | +tregex: NP=n0 <1 (CC=n1) <2 (NP=c1 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=c2 <1 (NN=n6) <- =n6) <- =c2 | ||
27 | + | ||
28 | +// 90 | ||
29 | +tregex: ADJP=n0 <1 (CC=n1) <2 (ADJP=c1 <1 (JJ=n3) <- =n3) <3 (CC=n4) <4 (ADJP=c2 <1 (JJ=n6) <- =n6) <- =c2 | ||
30 | + | ||
31 | +// 78 | ||
32 | +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <- =n2) <2 (/,/=n3) <3 (NP=c2 <1 (NN=n5) <- =n5) <4 (/,/=n6) <5 (CC=n7) <6 (NP=c3 <1 (NN=n9) <- =n9) <- =c3 | ||
33 | + | ||
34 | +// CD CC CD | ||
35 | +tregex: QP=n0 <1 (CD=c1) <2 (CC=n2) <3 (CD=c2) <- =c2 | ||
36 | + | ||
37 | +// PRP CC PRP | ||
38 | +tregex: NP=n0 <1 (PRP=c1) <2 (CC=n2) <3 (PRP=c2) <- =c2 | ||
39 | + | ||
40 | +// DT NP:NN CC NP:NN | ||
41 | +tregex: NP=n0 <1 (DT=n1) <2 (NP=c1 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=c2 <1 (NN=n6) <- =n6) <- =c2 | ||
42 | + | ||
43 | +// NP:NNS CC NP:NNS | ||
44 | +tregex: NP=n0 <1 (NP=c1 <1 (NNS=n2) <- =n2) <2 (CC=n3) <3 (NP=c2 <1 (NNS=n5) <- =n5) <- =c2 | ||
45 | + | ||
46 | +// NP:NN CC NP:NN | ||
47 | +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=c2 <1 (NN=n5) <- =n5) <4 (NP=n6 <1 (NNS=n7) <- =n7) <- =n6 | ||
48 | + | ||
49 | +// 30 | ||
50 | +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (NN=c2) <2 (NNS=n6) <- =n6) <- =n4 | ||
51 | + | ||
52 | +// 29 | ||
53 | +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=c2 <1 (JJ=n5) <2 (NNS=n6) <- =n6) <- =c2 | ||
54 | + | ||
55 | +// 28 | ||
56 | +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <2 (NN=n3) <3 (NN=c1) <- =c1) <2 (CC=n5) <3 (NP=c2 <1 (NN=n7) <- =n7) <- =c2 | ||
57 | + | ||
58 | +// 27 | ||
59 | +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=c2 <1 (NN=n5) <- =n5) <4 (NP=n6 <1 (NN=n7) <- =n7) <- =n6 | ||
60 | + | ||
61 | +// 25 | ||
62 | +tregex: NP=n0 <1 (NP=c1 <1 (CD=n2) <- =n2) <2 (CC=n3) <3 (NP=c2 <1 (CD=n5) <- =n5) <- =c2 | ||
63 | + | ||
64 | +// 24 | ||
65 | +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <- =n2) <2 (/,/=n3) <3 (NP=c2 <1 (NN=n5) <- =n5) <4 (/,/=n6) <5 (NP=c3 <1 (NN=n8) <- =n8) <6 (/,/=n9) <7 (CC=n10) <8 (NP=c4 <1 (NN=n12) <- =n12) <- =c4 | ||
66 | + | ||
67 | +// 23 | ||
68 | +tregex: ADVP=n0 <1 (ADVP=c1 <1 (RB=n2) <- =n2) <2 (CC=n3) <3 (ADVP=c2 <1 (RB=n5) <- =n5) <- =c2 | ||
69 | + | ||
70 | +// 20 | ||
71 | +tregex: ADJP=n0 <1 (ADJP=c1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (ADJP=c2 <1 (JJ=n5) <- =n5) <- =c2 | ||
72 | + | ||
73 | +// 20 | ||
74 | +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <- =n2) <2 (/,/=n3) <3 (NP=c2 <1 (NN=n5) <- =n5) <4 (CC=n6) <5 (NP=c3 <1 (NN=n8) <- =n8) <- =c3 | ||
75 | + | ||
76 | +// 21 | ||
77 | +tregex: NP=n0 <1 (NP=c1 <1 (JJ=n2) <2 (NN=n3) <- =n3) <2 (CC=n4) <3 (NP=c2 <1 (JJ=n6) <2 (NN=n7) <- =n7) <- =c2 | ||
78 | + | ||
79 | +// 17 | ||
80 | +tregex: NP=n0 <1 (NN=c1) <2 (CC=n2) <3 (NN=c2) <4 (NNS=n4) <- =n4 | ||
81 | + | ||
82 | +// 17 | ||
83 | +tregex: NP=n0 <1 (NP=c1 <1 (NNP=n2) <- =n2) <2 (CC=n3) <3 (NP=c2 <1 (NNP=n5) <- =n5) <- =c2 | ||
84 | + | ||
85 | +// 16 | ||
86 | +tregex: NP=n0 <1 (NP=c1 <1 (JJ=n2) <2 (NN=n3) <3 (NN=n4) <- =n4) <2 (CC=n5) <3 (NP=c2 <1 (NN=n7) <- =n7) <- =c2 | ||
87 | + | ||
88 | +// 16 | ||
89 | +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <2 (NN=n3) <3 (NN=n4) <- =n4) <2 (CC=n5) <3 (NP=c2 <1 (NN=n7) <2 (NN=n8) <3 (NN=n9) <- =n9) <- =c2 | ||
90 | + | ||
91 | +// 15 | ||
92 | +tregex: NP=n0 <1 (NNS=n1) <2 (CD=c1) <3 (CC=n3) <4 (CD=c2) <- =c2 | ||
93 | + | ||
94 | +// 13 | ||
95 | +tregex: NP=n0 <1 (NP=c1 <1 (NNS=n2) <- =n2) <2 (CC=n3) <3 (NP=c2 <1 (NN=n5) <2 (NNS=n6) <- =n6) <- =c2 | ||
96 | + | ||
97 | +// 13 | ||
98 | +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <2 (NNS=c1) <- =c1) <2 (CC=n4) <3 (NP=n5 <1 (NNS=c2) <- =c2) <- =c2 | ||
99 | + | ||
100 | +// 12 | ||
101 | +tregex: NP=n0 <1 (CC=n1) <2 (NP=n2 <1 (JJ=n3) <2 (NN=c1) <- =c1) <3 (CC=n5) <4 (NP=n6 <1 (NN=c2) <- =c2) <- =n6 | ||
102 | + | ||
103 | +// 11 | ||
104 | +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <- =n2) <2 (/,/=n3) <3 (NP=c2 <1 (NN=n5) <- =n5) <4 (/,/=n6) <5 (CC=n7) <6 (NP=n8 <1 (NN=c3) <2 (NNS=n10) <- =n10) <- =n8 | ||
105 | + | ||
106 | +// 11 | ||
107 | +tregex: NP=n0 <1 (NP=c1 <1 (JJ=n2) <2 (NNS=n3) <- =n3) <2 (CC=n4) <3 (NP=c2 <1 (JJ=n6) <2 (NNS=n7) <- =n7) <- =c2 | ||
108 | + | ||
109 | +// 11 | ||
110 | +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <2 (NNS=n3) <- =n3) <2 (CC=n4) <3 (NP=c2 <1 (NN=n6) <2 (NNS=n7) <- =n7) <- =c2 | ||
111 | + | ||
112 | +// 11 | ||
113 | +tregex: NP=n0 <1 (NP=n1 <1 (JJ=n2) <2 (NNS=c1) <- =c1) <2 (CC=n4) <3 (NP=c2 <1 (NNS=n6) <- =n6) <- =c2 | ||
114 | + | ||
115 | +// 10 | ||
116 | +tregex: NP=n0 <1 (JJ=n1) <2 (NP=c1 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=c2 <1 (NN=n6) <- =n6) <- =c2 | ||
117 | + | ||
118 | +// 10 | ||
119 | +tregex: NP=n0 <1 (CC=n1) <2 (NP=c1 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=n5 <1 (JJ=c2) <2 (NNS=n7) <- =n7) <- =n5 | ||
120 | + | ||
121 | +// 10 | ||
122 | +tregex: NP=n0 <1 (DT=n1) <2 (NN=c1) <3 (CC=n3) <4 (NN=c2) <5 (NNS=n5) <- =n5 | ||
123 | + | ||
124 | +// 10 | ||
125 | +tregex: NP=n0 <1 (CC=n1) <2 (NP=c1 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=n5 <1 (NN=c2) <2 (NNS=n7) <- =n7) <- =n5 | ||
126 | + | ||
127 | +// 10 | ||
128 | +tregex: NP=n0 <1 (DT=n1) <2 (NP=c1 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=c2 <1 (NN=n6) <- =n6) <5 (NP=n7 <1 (NNS=n8) <- =n8) <- =n7 | ||
129 | + | ||
130 | +// 10 | ||
131 | +tregex: NP=n0 <1 (NP=c1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=c2 <1 (NN=n5) <- =n5) <4 (NP=n6 <1 (NN=n7) <2 (NNS=n8) <- =n8) <- =n6 | ||
132 | + | ||
133 | +// 2 (DT NN CC NN) | ||
134 | +tregex: NP=p <1 /CC|DT|JJ|VBN|PRP\$/ <2 (/NP|NN/=c1) <3 CC|CONJP=cc <4 (/NP|NN/=c2) <- =c2 | ||
135 | +tregex: NP=p <1 /CC|DT/ <2 JJ=c1 <3 CC=cc <4 JJ=c2 | ||
136 | +// 2 NN CC NN NNS: 17 | ||
137 | +tregex: NP=p <1 /NP|NN/=c1 <2 CC|CONJP=cc <3 (/NP|NN/=c2 !< PP) <4 __=end <- =end | ||
138 | +tregex: NP=p <1 CC <2 /NP|NN/=c1 <3 CC|CONJP=cc <4 (/NP|NN/=c2 !< PP) <5 __=end <- =end | ||
139 | +// 2 NN CC NN NN NN | ||
140 | +tregex: NP=p <1 /NP|NN/=c1 <2 CC|CONJP=cc <3 /NP|NN/=c2 <4 /NP|NN/ <5 /NP|NN/=end <- =end | ||
141 | +// 2 VBN NN CC NN NN: 12 | ||
142 | +tregex: NP=p <1 /CC|DT|JJ|VBN|PRP\$/ <2 /NP|NN/=c1 <3 CC|CONJP=cc <4 /NP|NN/=c2 <5 /NP|NN/ <6 /NP|NN/=end <- =end | ||
143 | +// 2 DT NP CC NP NP | ||
144 | +tregex: NP=p <1 /CC|DT|JJ|VBN|PRP\$/ <2 (/NP|NN/=c1) <3 CC|CONJP=cc <4 (/NP|NN/=c2) <5 (/NP|NN/=end) <- =end | ||
145 | +// 2 NP CC NP NP: 92 | ||
146 | +tregex: NP <1 (NP=c1 <: NN) <2 CC|CONJP=cc <3 (NP=c2 <: NN) <4 NP=end <- =end | ||
147 | +// 2 NN CD CC CD: 20 | ||
148 | +tregex: NP <2 (CD=c1) <3 CC|CONJP=cc <4 (CD=c2) <- =c2 | ||
149 | + | ||
150 | +// 2 CC DT NP CC NP NP | ||
151 | +tregex: NP <1 (CC=cc1) <2 (DT) <3 (NP=c1 <: (NN)) <4 (CC=cc2) <5 (NP=c2 <: (NN)) <6 (NP=end <: (NNS)) <- =end | ||
152 | + | ||
153 | +// 2 (JJ NN) CC NN | ||
154 | +tregex: NP=p <1 (NP <1 JJ=jj <2 /NN|NNS/=c1 <- =c1) <2 CC|CONJP <3 (NP=c2 <: /NN|NNS/=nn2) <- =c2 | ||
155 | +// 2 NN NN CC NN: 129 | ||
156 | +tregex: NP=p <1 (NP <1 NN=jj <2 /NN|NNS/=c1 <- =c1) <2 CC|CONJP <3 (NP=c2 <: /NN|NNS/=nn2) <- =c2 | ||
157 | + | ||
158 | +// 2 | ||
159 | +tregex: VP=p <1 (VP=c1 <: VBN|VBZ|VBD|VBP|VBG=cc1) <2 CC|CONJP <3 (VP=v2 <1 ~cc1=c2 <2 __=others) <- =v2 | ||
160 | +tregex: VP=p <1 (VP=c1 <: VBN|VBZ|VBD|VBP|VBG=cc1) <2 CC|CONJP <3 (VP=v2 <1 ~cc1=c2) | ||
161 | + | ||
162 | +// 2 NP CC NP PP | ||
163 | +tregex: NP=p <1 (NP=c1 !< PP !<< /secretion/) <2 CC|CONJP=cc <3 (NP <1 NP=c2 <2 PP=pp <- =pp) | ||
164 | +tregex: NP=p <1 (NP=c1 !< PP !<< /secretion/) <2 /,/ <3 CC|CONJP=cc <4 (NP <1 NP=c2 <2 PP=pp <- =pp) | ||
165 | +tregex: NP=p <1 CC <2 (NP=c1 !< PP !<< /secretion/) <3 CC|CONJP=cc <4 (NP <1 NP=c2 <2 PP=pp <- =pp) | ||
166 | + | ||
167 | +// 2 NN CC (NN NNS): 151 | ||
168 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 CC|CONJP <3 (NP=n2 <1 NN=c2 <2 NN|NNS=nns) | ||
169 | +// 2 CC NN CC (NN NNS): 30 | ||
170 | +tregex: NP=p <1 (CC) <2 (NP=c1 <: /NN|NNS/=nn1) <3 CC|CONJP <4 (NP=n2 <1 /JJ|VBN|NN/=c2 <2 /NN|NNS/=nns) <- =n2 | ||
171 | + | ||
172 | +// 2 JJ CC NP(NN NNS) | ||
173 | +tregex: NP=p <1 (NP <1 (ADJP=c1 <: (JJ=nn1))) <2 (CC) <3 (NP=n2 <1 (NN=c2) <2 (NNS=nns) <- =nns) <- =n2 | ||
174 | + | ||
175 | +// 2 NP(NN NN NN) CC NP(NN) | ||
176 | +tregex: NP=p <1 (NP <1 (NN|DT|JJ) <2 (NN|JJ) <3 (NN=c1) <- =c1) <2 (CC) <3 (NP=c2 <: (NN=nn2)) <- =c2 | ||
177 | + | ||
178 | +// 2 NP(JJ NN NN) CC NP(NN NN) | ||
179 | +tregex: NP=p <1 (NP=c1 <1 (JJ) <2 (NN=nn3) <3 (NN=nn1) <- =nn1) <2 (CC) <3 (NP=c2 <1 (NN=nn4) <2 (NN=nn2) <- =nn2) <- =c2 | ||
180 | + | ||
181 | +// 2 ADVP PP , CC PP | ||
182 | +tregex: PP=p <1 ADVP <2 PP=c1 <3 /,/ <4 CC=cc <5 PP=c2 <- =c2 | ||
183 | +tregex: NP=p <1 (NP=c1 !<< DT) <2 /,/ <3 (NP=c2 !<< DT) <- =c2 | ||
184 | +tregex: VP=p <1 (VP=c1 << NP) <2 CC|CONJP <3 (VP=c2 <- NP) | ||
185 | +tregex: NP=p <1 (NP=c1 <: NN) <2 CC|CONJP <3 (NP=c2 <1 DT) | ||
186 | +//UCP=p <1 (ADJP=c1 <: (JJ)) <2 (CC) <3 (NP=c2 <: (NN)) <- =c2 | ||
187 | +tregex: NP=p <1 (CC) <2 (NP=c1 <1 (DT) <2 (NN)) <3 (CC) <4 (NP=c2 <1 (DT) <2 (NN) <3 (NN)) <- =c2 | ||
188 | + | ||
189 | +// 2 NP CC NP NP(NNS) | ||
190 | +tregex: NP=p <1 NP=c1 <2 CC|CONJP=cc <3 NP=c2 <4 (NP <- NNS) | ||
191 | + | ||
192 | +// 2 NP(NN) CC NP(JJ JJ NN) | ||
193 | +tregex: NP=p <1 (NP=c1 <: (NN=nn1)) <2 (CC) <3 (NP=c2 <1 (JJ=jj1) <2 (JJ=jj2) <3 (NN=nn2) <- =nn2) <- =c2 | ||
194 | + | ||
195 | +// 2 NP(ADJP) CC NP(NN NNS) | ||
196 | +tregex: NP=p <1 (NP=c1 <: (ADJP <: (JJ=nn1))) <2 (CC) <3 (NP=n2 <1 (NN=c2) <2 (NNS=nns) <- =nns) <- =n2 | ||
197 | + | ||
198 | +// 2 NN NN NP CC NP(NN) | ||
199 | +tregex: NP=p <1 (NN) <2 (NN) <3 (NP=c1 <1 (NN)) <4 (CC=cc) <5 (NP=c2 <: (NN)) <- =c2 | ||
200 | + | ||
201 | +// 2 NN CC NN NN NN NN | ||
202 | +tregex: NP=p <1 (NN=c1) <2 (CC=cc) <3 (NN=c2) <4 (NN) <5 (NN) <6 (NN) | ||
203 | + | ||
204 | +// 3 | ||
205 | +tregex: VP=p <1 (VP=c1 <: VBN|VBZ|VBD|VBP|VBG=cc1) <2 /,/ <3 (VP=c2 <: ~cc1=cc2) <4 /,/ <5 CC|CONJP <6 (VP=v2 <1 ~cc1=c3 <2 __=others) <- =v2 | ||
206 | + | ||
207 | +//3 NN , NN , CC (NN NN): 19 | ||
208 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 CC|CONJP <5 (NP <1 NN=c3 <2 NNS=nn4 <- =nn4) | ||
209 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 CC|CONJP <6 (NP <1 NN=c3 <2 NNS=nn4 <- =nn4) | ||
210 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 CC|CONJP <5 (NP <1 NN=c3 <2 (NN=nn4 <<: /mRNA/) <- =nn4) | ||
211 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 CC|CONJP <6 (NP <1 NN=c3 <2 (NN=nn4 <<: /mRNA/) <- =nn4) | ||
212 | + | ||
213 | +//3 NP , NP , CC NP NP: 5 | ||
214 | +tregex: NP <1 (NP=c1 <: NN) <2 /,/=comma1 <3 (NP=c2 <: NN) <4 /,/=comma2 <5 CC|CONJP=cc <6 (NP=c3 <: NN) <7 NP=end <- =end | ||
215 | + | ||
216 | +//3 NN , NN , CC NN NN NNS: 2 | ||
217 | +tregex: NP <1 NN=c1 <2 /,/=comma1 <3 NN=c2 <4 /,/=comma2 <5 CC|CONJP=cc <6 NN=c3 <7 NN <8 /NNS/=end <- =end | ||
218 | + | ||
219 | +// 3 NP(JJ NN) , NP , CC NP | ||
220 | +tregex: NP=p <1 (NP=c1 <1 (JJ) <2 (NN=nn1)) <2 (/,/) <3 (NP <1 (NN=nn2)) <4 (/,/) <5 (CC) <6 (NP=c3 <1 (NN=nn3)) <- c3 | ||
221 | + | ||
222 | +// 4 | ||
223 | +tregex: NP=p <1 /NP|NN/=c1 <2 /,/=comma1 <3 /NP|NN/=c2 <4 /,/=comma2 <5 /NP|NN/=c3 <6 CC|CONJP=cc <7 /NP|NN/=c4 <8 /NP|NN/=end <- =end | ||
224 | + | ||
225 | +// 4 | ||
226 | +tregex: NP=p <1 (NP=c1 <1 (NN) <2 (NN)) <2 (/,/) <3 (NP=c2 <1 (NN)) <4 (/,/) <5 (NP=c3 <1 (NN)) <6 (/,/) <7 (CC) <8 (NP=c4 <1 (NN)) <- =c4 | ||
227 | + | ||
228 | +//4 NN , NN , NN , CC NN NN: 4 | ||
229 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 CC|CONJP <7 (NP <1 NN=c4 <2 NNS=nn5 <- =nn5) | ||
230 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 /,/ <7 CC|CONJP <8 (NP <1 NN=c4 <2 NNS=nn5 <- =nn5) | ||
231 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 CC|CONJP <7 (NP <1 NN=c4 <2 (NN=nn5 <<: /mRNA/) <- =nn5) | ||
232 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 /,/ <7 CC|CONJP <8 (NP <1 NN=c4 <2 (NN=nn5 <<: /mRNA/) <- =nn5) | ||
233 | + | ||
234 | +tregex: (NP=n6 <1 (NP=c1) <2 (/,/=n9) <3 (NP=c2) <4 (/,/=n12) <5 (NP=c3) <6 (/,/=n15) <7 (NP=c4) <8 (/,/=n18) <9 (NP=c5) <10 (/,/=n21) <11 (NP=c6) <- =c6) | ||
235 | + | ||
236 | +tregex: (NP=n6 <1 (NP=c1) <2 (/,/=n9) <3 (NP=c2) <4 (/,/=n12) <5 (NP=c3) <6 (/,/=n15) <7 (NP=c4) <8 (/,/=n18) <9 (NP=c5) <- =c5) | ||
237 | + | ||
238 | +tregex: (NP=n6 <1 (NP=c1) <2 (/,/=n9) <3 (NP=c2) <4 (/,/=n12) <5 (NP=c3) <6 (/,/=n15) <7 (NP=c4) <- =c4) | ||
239 | + | ||
240 | +tregex: (NP=n6 <1 (NP=c1) <2 (/,/=n9) <3 (NP=c2) <4 (/,/=n12) <5 (NP=c3) <- =c3) | ||
241 | + | ||
242 | +//1 PP ; PP ; PP cc PP | ||
243 | +tregex: (PP=n6 <1 (PP=c1) <3 (PP=c2) <5 (PP=c3) <7 (PP=c4) <- =c4) | ||
244 | + |
isimp_v2/rules/coordination.txt
0 → 100644
1 | +// mrna | ||
2 | +tregex: NP=p < ((NP <<- /mRNA/=mrna) $+ (CC=cc $+ (NP <<, /protein|surface/=protein))) | ||
3 | +tregex: NP=p < ((NN <<: /surface/=protein) $+ (CC=cc $+ (NN <<: /mRNA/=mrna))) | ||
4 | +tregex: NP=p < ((NP <<- /protein|surface/=protein) $+ (CC=cc $+ (NP <<, /mRNA/=mrna))) | ||
5 | +operation: prune mrna cc | ||
6 | +operation: prune cc protein | ||
7 | + | ||
8 | +// NP cc NP (mRNA) | ||
9 | +tregex: NP=p <1 (NP <: NN=nn1) <2 CC <3 (NP=np2 <1 __=nn <2 (NN <<: /mRNA/)) | ||
10 | +operation: replace p np2 | ||
11 | +operation: replace nn nn1, replace p np2 | ||
12 | + | ||
13 | +// NP , NP cc NP (mRNA) | ||
14 | +tregex: NP=p <1 (NP <: NN=nn1) <2 /,/ <3 (NP <: NN=nn2) <4 CC <5 (NP=np2 <1 __=nn <2 (NN <<: /mRNA/)) | ||
15 | +operation: replace p np2 | ||
16 | +operation: replace nn nn1, replace p np2 | ||
17 | +operation: replace nn nn2, replace p np2 | ||
18 | + | ||
19 | +// NP , NP , cc NP (mRNA) | ||
20 | +tregex: NP=p <1 (NP <: NN=nn1) <2 /,/ <3 (NP <: NN=nn2) <4 /,/ <5 CC <6 (NP=np2 <1 __=nn <2 (NN <<: /mRNA/)) | ||
21 | +operation: replace p np2 | ||
22 | +operation: replace nn nn1, replace p np2 | ||
23 | +operation: replace nn nn2, replace p np2 | ||
24 | + | ||
25 | +// NP , NP , NP cc NP (mRNA) | ||
26 | +tregex: NP=p <1 (NP <: NN=nn1) <2 /,/ <3 (NP <: NN=nn2) <4 /,/ <5 (NP <: NN=nn3) <6 CC <7 (NP=np2 <1 __=nn <2 (NN <<: /mRNA/)) | ||
27 | +operation: replace p np2 | ||
28 | +operation: replace nn nn1, replace p np2 | ||
29 | +operation: replace nn nn2, replace p np2 | ||
30 | +operation: replace nn nn3, replace p np2 | ||
31 | + | ||
32 | +// NP , NP , NP , cc NP (mRNA) | ||
33 | +tregex: NP=p <1 (NP <: NN=nn1) <2 /,/ <3 (NP <: NN=nn2) <4 /,/ <5 (NP <: NN=nn3) <6 /,/ <7 CC <8 (NP=np2 <1 __=nn <2 (NN <<: /mRNA/)) | ||
34 | +operation: replace p np2 | ||
35 | +operation: replace nn nn1, replace p np2 | ||
36 | +operation: replace nn nn2, replace p np2 | ||
37 | +operation: replace nn nn3, replace p np2 | ||
38 | + | ||
39 | +// 892 | ||
40 | +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (NN=n5) <- =n5) <- =n4 | ||
41 | +operation: replace n0 n1 | ||
42 | +operation: replace n0 n4 | ||
43 | + | ||
44 | +// 308 | ||
45 | +tregex: ADJP=n0 <1 (ADJP=n1 <1 (JJ=n2) <- =n2) <2 (CC=n3) <3 (ADJP=n4 <1 (JJ=n5) <- =n5) <- =n4 | ||
46 | +operation: replace n0 n1 | ||
47 | +operation: replace n0 n4 | ||
48 | + | ||
49 | +// 162 | ||
50 | +tregex: NP=n0 <1 (CC=n1) <2 (NP=n2 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=n5 <1 (NN=n6) <- =n6) <- =n5 | ||
51 | +operation: replace n0 n2 | ||
52 | +operation: replace n0 n5 | ||
53 | + | ||
54 | +// 90 | ||
55 | +tregex: ADJP=n0 <1 (CC=n1) <2 (ADJP=n2 <1 (JJ=n3) <- =n3) <3 (CC=n4) <4 (ADJP=n5 <1 (JJ=n6) <- =n6) <- =n5 | ||
56 | +operation: replace n0 n2 | ||
57 | +operation: replace n0 n5 | ||
58 | + | ||
59 | +// 78 | ||
60 | +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (/,/=n3) <3 (NP=n4 <1 (NN=n5) <- =n5) <4 (/,/=n6) <5 (CC=n7) <6 (NP=n8 <1 (NN=n9) <- =n9) <- =n8 | ||
61 | +operation: replace n0 n1 | ||
62 | +operation: replace n0 n4 | ||
63 | +operation: replace n0 n8 | ||
64 | + | ||
65 | +// 77 | ||
66 | +tregex: QP=n0 <1 (CD=n1) <2 (CC=n2) <3 (CD=n3) <- =n3 | ||
67 | +operation: replace n0 n1 | ||
68 | +operation: replace n0 n3 | ||
69 | + | ||
70 | +// 68 | ||
71 | +tregex: NP=n0 <1 (DT=n1) <2 (NP=n2 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=n5 <1 (NN=n6) <- =n6) <- =n5 | ||
72 | +operation: replace n0 n2 | ||
73 | +operation: replace n0 n5 | ||
74 | + | ||
75 | +// 47 | ||
76 | +tregex: NP=n0 <1 (NP=n1 <1 (NNS=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (NNS=n5) <- =n5) <- =n4 | ||
77 | +operation: replace n0 n1 | ||
78 | +operation: replace n0 n4 | ||
79 | + | ||
80 | +// 38 | ||
81 | +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (NN=n5) <- =n5) <4 (NP=n6 <1 (NNS=n7) <- =n7) <- =n6 | ||
82 | +operation: prune n1 n3 | ||
83 | +operation: prune n3 n4 | ||
84 | + | ||
85 | +// 30 | ||
86 | +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (NN=n5) <2 (NNS=n6) <- =n6) <- =n4 | ||
87 | +operation: replace n0 n4 | ||
88 | +operation: replace n5 n2, replace n0 n4 | ||
89 | + | ||
90 | +// 29 | ||
91 | +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (JJ=n5) <2 (NNS=n6) <- =n6) <- =n4 | ||
92 | +operation: replace n0 n4 | ||
93 | +operation: replace n5 n2, replace n0 n4 | ||
94 | + | ||
95 | +// 28 | ||
96 | +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <2 (NN=n3) <3 (NN=n4) <- =n4) <2 (CC=n5) <3 (NP=n6 <1 (NN=n7) <- =n7) <- =n6 | ||
97 | +operation: replace n0 n1 | ||
98 | +operation: replace n4 n7, replace n0 n1 | ||
99 | + | ||
100 | +// 27 | ||
101 | +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (NN=n5) <- =n5) <4 (NP=n6 <1 (NN=n7) <- =n7) <- =n6 | ||
102 | +operation: prune n1 n3 | ||
103 | +operation: prune n3 n4 | ||
104 | + | ||
105 | +// 25 | ||
106 | +tregex: NP=n0 <1 (NP=n1 <1 (CD=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (CD=n5) <- =n5) <- =n4 | ||
107 | +operation: replace n0 n1 | ||
108 | +operation: replace n0 n4 | ||
109 | + | ||
110 | +// 24 | ||
111 | +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (/,/=n3) <3 (NP=n4 <1 (NN=n5) <- =n5) <4 (/,/=n6) <5 (NP=n7 <1 (NN=n8) <- =n8) <6 (/,/=n9) <7 (CC=n10) <8 (NP=n11 <1 (NN=n12) <- =n12) <- =n11 | ||
112 | +operation: replace n0 n1 | ||
113 | +operation: replace n0 n4 | ||
114 | +operation: replace n0 n7 | ||
115 | +operation: replace n0 n11 | ||
116 | + | ||
117 | +// 23 | ||
118 | +tregex: ADVP=n0 <1 (ADVP=n1 <1 (RB=n2) <- =n2) <2 (CC=n3) <3 (ADVP=n4 <1 (RB=n5) <- =n5) <- =n4 | ||
119 | +operation: replace n0 n1 | ||
120 | +operation: replace n0 n4 | ||
121 | + | ||
122 | +// 20 | ||
123 | +tregex: ADJP=n0 <1 (ADJP=n1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (ADJP=n4 <1 (JJ=n5) <- =n5) <- =n4 | ||
124 | +operation: replace n0 n1 | ||
125 | +operation: replace n0 n4 | ||
126 | + | ||
127 | +// 20 | ||
128 | +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (/,/=n3) <3 (NP=n4 <1 (NN=n5) <- =n5) <4 (CC=n6) <5 (NP=n7 <1 (NN=n8) <- =n8) <- =n7 | ||
129 | +operation: replace n0 n1 | ||
130 | +operation: replace n0 n4 | ||
131 | +operation: replace n0 n7 | ||
132 | + | ||
133 | +// 21 | ||
134 | +tregex: NP=n0 <1 (NP=n1 <1 (JJ=n2) <2 (NN=n3) <- =n3) <2 (CC=n4) <3 (NP=n5 <1 (JJ=n6) <2 (NN=n7) <- =n7) <- =n5 | ||
135 | +operation: replace n0 n1 | ||
136 | +operation: replace n0 n4 | ||
137 | + | ||
138 | +// 17 | ||
139 | +tregex: NP=n0 <1 (NN=n1) <2 (CC=n2) <3 (NN=n3) <4 (NNS=n4) <- =n4 | ||
140 | +operation: prune n1 n2 | ||
141 | +operation: prune n2 n3 | ||
142 | + | ||
143 | +// 17 | ||
144 | +tregex: NP=n0 <1 (NP=n1 <1 (NNP=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (NNP=n5) <- =n5) <- =n4 | ||
145 | +operation: replace n0 n1 | ||
146 | +operation: replace n0 n4 | ||
147 | + | ||
148 | +// 16 | ||
149 | +tregex: NP=n0 <1 (NP=n1 <1 (JJ=n2) <2 (NN=n3) <3 (NN=n4) <- =n4) <2 (CC=n5) <3 (NP=n6 <1 (NN=n7) <- =n7) <- =n6 | ||
150 | +operation: replace n0 n1 | ||
151 | +operation: replace n4 n7, replace n0 n1 | ||
152 | + | ||
153 | +// 16 | ||
154 | +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <2 (NN=n3) <3 (NN=n4) <- =n4) <2 (CC=n5) <3 (NP=n6 <1 (NN=n7) <2 (NN=n8) <3 (NN=n9) <- =n9) <- =n6 | ||
155 | +operation: replace n0 n1 | ||
156 | +operation: replace n0 n6 | ||
157 | + | ||
158 | +// 15 | ||
159 | +tregex: NP=n0 <1 (NNS=n1) <2 (CD=n2) <3 (CC=n3) <4 (CD=n4) <- =n4 | ||
160 | +operation: prune n2 n3 | ||
161 | +operation: prune n3 n4 | ||
162 | + | ||
163 | +// 13 | ||
164 | +tregex: NP=n0 <1 (NP=n1 <1 (NNS=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (NN=n5) <2 (NNS=n6) <- =n6) <- =n4 | ||
165 | +operation: replace n0 n1 | ||
166 | +operation: replace n0 n4 | ||
167 | + | ||
168 | +// 13 | ||
169 | +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <2 (NNS=n3) <- =n3) <2 (CC=n4) <3 (NP=n5 <1 (NNS=n6) <- =n6) <- =n5 | ||
170 | +operation: replace n0 n1 | ||
171 | +operation: replace n3 n6, replace n0 n1 | ||
172 | + | ||
173 | +// 12 | ||
174 | +tregex: NP=n0 <1 (CC=n1) <2 (NP=n2 <1 (JJ=n3) <2 (NN=n4) <- =n4) <3 (CC=n5) <4 (NP=n6 <1 (NN=n7) <- =n7) <- =n6 | ||
175 | +operation: replace n0 n2 | ||
176 | +operation: replace n4 n7, replace n0 n2 | ||
177 | + | ||
178 | +// 11 | ||
179 | +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (/,/=n3) <3 (NP=n4 <1 (NN=n5) <- =n5) <4 (/,/=n6) <5 (CC=n7) <6 (NP=n8 <1 (NN=n9) <2 (NNS=n10) <- =n10) <- =n8 | ||
180 | +operation: replace n0 n8 | ||
181 | +operation: replace n9 n2, replace n0 n8 | ||
182 | +operation: replace n9 n5, replace n0 n8 | ||
183 | + | ||
184 | +// 11 | ||
185 | +tregex: NP=n0 <1 (NP=n1 <1 (JJ=n2) <2 (NNS=n3) <- =n3) <2 (CC=n4) <3 (NP=n5 <1 (JJ=n6) <2 (NNS=n7) <- =n7) <- =n5 | ||
186 | +operation: replace n0 n1 | ||
187 | +operation: replace n0 n5 | ||
188 | + | ||
189 | +// 11 | ||
190 | +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <2 (NNS=n3) <- =n3) <2 (CC=n4) <3 (NP=n5 <1 (NN=n6) <2 (NNS=n7) <- =n7) <- =n5 | ||
191 | +operation: replace n0 n1 | ||
192 | +operation: replace n0 n5 | ||
193 | + | ||
194 | +// 11 | ||
195 | +tregex: NP=n0 <1 (NP=n1 <1 (JJ=n2) <2 (NNS=n3) <- =n3) <2 (CC=n4) <3 (NP=n5 <1 (NNS=n6) <- =n6) <- =n5 | ||
196 | +operation: replace n0 n1 | ||
197 | +operation: replace n0 n5 | ||
198 | + | ||
199 | +// 10 | ||
200 | +tregex: NP=n0 <1 (JJ=n1) <2 (NP=n2 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=n5 <1 (NN=n6) <- =n6) <- =n5 | ||
201 | +operation: prune n2 n4 | ||
202 | +operation: prune n4 n5 | ||
203 | + | ||
204 | +// 10 | ||
205 | +tregex: NP=n0 <1 (CC=n1) <2 (NP=n2 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=n5 <1 (JJ=n6) <2 (NNS=n7) <- =n7) <- =n5 | ||
206 | +operation: replace n0 n5 | ||
207 | +operation: replace n6 n3, replace n0 n5 | ||
208 | + | ||
209 | +// 10 | ||
210 | +tregex: NP=n0 <1 (DT=n1) <2 (NN=n2) <3 (CC=n3) <4 (NN=n4) <5 (NNS=n5) <- =n5 | ||
211 | +operation: prune n2 n3 | ||
212 | +operation: prune n3 n4 | ||
213 | + | ||
214 | +// 10 | ||
215 | +tregex: NP=n0 <1 (CC=n1) <2 (NP=n2 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=n5 <1 (NN=n6) <2 (NNS=n7) <- =n7) <- =n5 | ||
216 | +operation: replace n0 n5 | ||
217 | +operation: replace n6 n2, replace n0 n5 | ||
218 | + | ||
219 | +// 10 | ||
220 | +tregex: NP=n0 <1 (DT=n1) <2 (NP=n2 <1 (NN=n3) <- =n3) <3 (CC=n4) <4 (NP=n5 <1 (NN=n6) <- =n6) <5 (NP=n7 <1 (NNS=n8) <- =n8) <- =n7 | ||
221 | +operation: prune n2 n4 | ||
222 | +operation: prune n4 n5 | ||
223 | + | ||
224 | +// 10 | ||
225 | +tregex: NP=n0 <1 (NP=n1 <1 (NN=n2) <- =n2) <2 (CC=n3) <3 (NP=n4 <1 (NN=n5) <- =n5) <4 (NP=n6 <1 (NN=n7) <2 (NNS=n8) <- =n8) <- =n6 | ||
226 | +operation: prune n1 n3 | ||
227 | +operation: prune n3 n4 | ||
228 | + | ||
229 | +// 2 (DT NN CC NN) | ||
230 | +tregex: NP=p <1 /CC|DT|JJ|VBN|PRP\$/ <2 (/NP|NN/=c1) <3 CC|CONJP=cc <4 (/NP|NN/=c2) <- =c2 | ||
231 | +tregex: NP=p <1 /CC|DT/ <2 JJ=c1 <3 CC=cc <4 JJ=c2 | ||
232 | +// 2 NN CC NN NNS: 17 | ||
233 | +tregex: NP=p <1 /NP|NN/=c1 <2 CC|CONJP=cc <3 (/NP|NN/=c2 !< PP) <4 __=end <- =end | ||
234 | +tregex: NP=p <1 CC <2 /NP|NN/=c1 <3 CC|CONJP=cc <4 (/NP|NN/=c2 !< PP) <5 __=end <- =end | ||
235 | +// 2 NN CC NN NN NN | ||
236 | +tregex: NP=p <1 /NP|NN/=c1 <2 CC|CONJP=cc <3 /NP|NN/=c2 <4 /NP|NN/ <5 /NP|NN/=end <- =end | ||
237 | +// 2 VBN NN CC NN NN: 12 | ||
238 | +tregex: NP=p <1 /CC|DT|JJ|VBN|PRP\$/ <2 /NP|NN/=c1 <3 CC|CONJP=cc <4 /NP|NN/=c2 <5 /NP|NN/ <6 /NP|NN/=end <- =end | ||
239 | +// 2 DT NP CC NP NP | ||
240 | +tregex: NP=p <1 /CC|DT|JJ|VBN|PRP\$/ <2 (/NP|NN/=c1) <3 CC|CONJP=cc <4 (/NP|NN/=c2) <5 (/NP|NN/=end) <- =end | ||
241 | +// 2 NP CC NP NP: 92 | ||
242 | +tregex: NP <1 (NP=c1 <: NN) <2 CC|CONJP=cc <3 (NP=c2 <: NN) <4 NP=end <- =end | ||
243 | +// 2 NN CD CC CD: 20 | ||
244 | +tregex: NP <2 (CD=c1) <3 CC|CONJP=cc <4 (CD=c2) <- =c2 | ||
245 | +operation: prune cc c2 | ||
246 | +operation: prune cc c1 | ||
247 | + | ||
248 | +// 2 CC DT NP CC NP NP | ||
249 | +tregex: NP <1 (CC=cc1) <2 (DT) <3 (NP=c1 <: (NN)) <4 (CC=cc2) <5 (NP=c2 <: (NN)) <6 (NP=end <: (NNS)) <- =end | ||
250 | +operation: prune cc1 cc2 c2 | ||
251 | +operation: prune cc1 cc2 c1 | ||
252 | + | ||
253 | +// 2 (JJ NN) CC NN | ||
254 | +tregex: NP=p <1 (NP=c1 <1 JJ=jj <2 /NN|NNS/=end <- =end) <2 CC|CONJP <3 (NP=c2 <: /NN|NNS/=nn2) <- =c2 | ||
255 | +// 2 NN NN CC NN: 129 | ||
256 | +tregex: NP=p <1 (NP=c1 <1 NN=jj <2 /NN|NNS/=end <- =end) <2 CC|CONJP <3 (NP=c2 <: /NN|NNS/=nn2) <- =c2 | ||
257 | +operation: replace p c1 | ||
258 | +operation: move jj $+ nn2, replace p c2 | ||
259 | + | ||
260 | +// 2 | ||
261 | +tregex: VP=p <1 (VP=c1 <: VBN|VBZ|VBD|VBP|VBG=cc1) <2 CC|CONJP <3 (VP=c2 <1 ~cc1 <2 __=others) <- =c2 | ||
262 | +operation: replace p c2 | ||
263 | +operation: move others $- cc1, replace p c1 | ||
264 | + | ||
265 | +// 2 NP CC NP PP | ||
266 | +tregex: NP=p <1 (NP=c1 !< PP !<< /secretion/) <2 CC|CONJP=cc <3 (NP=c2 <2 PP=pp <- =pp) | ||
267 | +tregex: NP=p <1 (NP=c1 !< PP !<< /secretion/) <2 /,/ <3 CC|CONJP=cc <4 (NP=c2 <2 PP=pp <- =pp) | ||
268 | +tregex: NP=p <1 CC <2 (NP=c1 !< PP !<< /secretion/) <3 CC|CONJP=cc <4 (NP=c2 <2 PP=pp <- =pp) | ||
269 | +operation: replace p c2 | ||
270 | +operation: prune cc, replace c2 pp | ||
271 | + | ||
272 | +// 2 NN CC (NN NNS): 151 | ||
273 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 CC|CONJP <3 (NP=c2 <1 NN=nn2 <2 NN|NNS=nns) | ||
274 | +// 2 CC NN CC (NN NNS): 30 | ||
275 | +tregex: NP=p <1 (CC) <2 (NP=c1 <: /NN|NNS/=nn1) <3 CC|CONJP <4 (NP=c2 <1 /JJ|VBN|NN/=nn2 <2 /NN|NNS/=nns) <- =c2 | ||
276 | +operation: replace p c2 | ||
277 | +operation: move nns $- nn1, replace p c1 | ||
278 | +operation: replace p c1 | ||
279 | +operation: replace p c2 | ||
280 | + | ||
281 | +// 2 JJ CC NP(NN NNS) | ||
282 | +tregex: NP=p <1 (NP <1 (ADJP <: (JJ=nn1))) <2 (CC) <3 (NP=c2 <1 (NN=nn2) <2 (NNS=nns) <- =nns) <- =c2 | ||
283 | +operation: replace p c2 | ||
284 | +operation: replace nn2 nn1, replace p c2 | ||
285 | + | ||
286 | +// 2 NP(NN NN NN) CC NP(NN) | ||
287 | +tregex: NP=p <1 (NP=c1 <1 (NN|DT|JJ) <2 (NN|JJ) <3 (NN=nn1) <- =nn1) <2 (CC) <3 (NP=c2 <: (NN=nn2)) <- =c2 | ||
288 | +operation: replace p c1 | ||
289 | +operation: replace nn1 nn2, replace p c1 | ||
290 | + | ||
291 | +// 2 NP(JJ NN NN) CC NP(NN NN) | ||
292 | +tregex: NP=p <1 (NP=c1 <1 (JJ) <2 (NN=nn3) <3 (NN=nn1) <- =nn1) <2 (CC) <3 (NP=c2 <1 (NN=nn4) <2 (NN=nn2) <- =nn2) <- =c2 | ||
293 | +operation: replace p c1 | ||
294 | +operation: replace nn1 nn2, replace nn3 nn4, replace p c1 | ||
295 | + | ||
296 | +// 2 ADVP PP , CC PP | ||
297 | +tregex: PP=p <1 ADVP <2 PP=c1 <3 /,/ <4 CC=cc <5 PP=c2 <- =c2 | ||
298 | +tregex: NP=p <1 (NP=c1 !<< DT) <2 /,/ <3 (NP=c2 !<< DT) <- =c2 | ||
299 | +tregex: VP=p <1 (VP=c1 << NP) <2 CC|CONJP <3 (VP=c2 <- NP) | ||
300 | +tregex: NP=p <1 (NP=c1 <: NN) <2 CC|CONJP <3 (NP=c2 <1 DT) | ||
301 | +//UCP=p <1 (ADJP=c1 <: (JJ)) <2 (CC) <3 (NP=c2 <: (NN)) <- =c2 | ||
302 | +tregex: NP=p <1 (CC) <2 (NP=c1 <1 (DT) <2 (NN)) <3 (CC) <4 (NP=c2 <1 (DT) <2 (NN) <3 (NN)) <- =c2 | ||
303 | +operation: replace p c1 | ||
304 | +operation: replace p c2 | ||
305 | + | ||
306 | +// 2 NP CC NP NP(NNS) | ||
307 | +tregex: NP=p <1 NP=c1 <2 CC|CONJP=cc <3 NP=c2 <4 (NP <- NNS) | ||
308 | +operation: prune c1 cc | ||
309 | +operation: prune cc c2 | ||
310 | + | ||
311 | +// 2 NP(NN) CC NP(JJ JJ NN) | ||
312 | +tregex: NP=p <1 (NP=c1 <: (NN=nn1)) <2 (CC) <3 (NP=c2 <1 (JJ=jj1) <2 (JJ=jj2) <3 (NN=nn2) <- =nn2) <- =c2 | ||
313 | +operation: replace p c2 | ||
314 | +operation: prune jj1, replace jj2 nn1, replace p c2 | ||
315 | + | ||
316 | +// 2 NP(ADJP) CC NP(NN NNS) | ||
317 | +tregex: NP=p <1 (NP=c1 <: (ADJP <: (JJ=nn1))) <2 (CC) <3 (NP=c2 <1 (NN==jj1) <2 (NNS=nns) <- =nns) <- =c2 | ||
318 | +operation: replace p c2 | ||
319 | +operation: replace jj1 nn1, replace p c2 | ||
320 | + | ||
321 | +// 2 NN NN NP CC NP(NN) | ||
322 | +tregex: NP=p <1 (NN) <2 (NN) <3 (NP=c1 <1 (NN)) <4 (CC=cc) <5 (NP=c2 <: (NN)) <- =c2 | ||
323 | +operation: prune c1 cc | ||
324 | +operation: prune cc c2 | ||
325 | + | ||
326 | +// 2 NN CC NN NN NN NN | ||
327 | +tregex: NP=p <1 (NN=c1) <2 (CC=cc) <3 (NN=c2) <4 (NN) <5 (NN) <6 (NN) | ||
328 | +operation: prune c1 cc | ||
329 | +operation: prune cc c2 | ||
330 | + | ||
331 | +// 2 | ||
332 | +tregex: NP=n0 <1 (NP=n1 <: NN) <2 CC=cc <3 (NP=n2 <: NN) <4 NN <5 NN | ||
333 | +operation: prune n1 cc | ||
334 | +operation: prune cc n2 | ||
335 | + | ||
336 | +// 2 | ||
337 | +tregex: NP=n0 <1 (NP=n1 <: NN) <2 CC=cc <3 NN=n2 <4 (NN <<, /mRNA/) <5 NN | ||
338 | +operation: prune n1 cc | ||
339 | +operation: prune cc n2 | ||
340 | + | ||
341 | +// 3 | ||
342 | +tregex: VP=p <1 (VP=c1 <: VBN|VBZ|VBD|VBP|VBG=cc1) <2 /,/ <3 (VP=c2 <: ~cc1=cc2) <4 /,/ <5 CC|CONJP <6 (VP=c3 <1 ~cc1 <2 __=others) <- =c3 | ||
343 | +operation: replace p c3 | ||
344 | +operation: move others $- cc1, replace p c1 | ||
345 | +operation: replace p c3 | ||
346 | +operation: move others $- cc2, replace p c2 | ||
347 | + | ||
348 | +//3 NN , NN , CC (NN NN): 19 | ||
349 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 CC|CONJP <5 (NP=c3 <1 NN=nn3 <2 NNS=nn4 <- =nn4) | ||
350 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 CC|CONJP <6 (NP=c3 <1 NN=nn3 <2 NNS=nn4 <- =nn4) | ||
351 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 CC|CONJP <5 (NP=c3 <1 NN=nn3 <2 (NN=nn4 <<: /mRNA/) <- =nn4) | ||
352 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 CC|CONJP <6 (NP=c3 <1 NN=nn3 <2 (NN=nn4 <<: /mRNA/) <- =nn4) | ||
353 | +operation: replace p c3 | ||
354 | +operation: move nn4 $- nn1, replace p c1 | ||
355 | +operation: move nn4 $- nn2, replace p c2 | ||
356 | + | ||
357 | +//3 NP , NP , CC NP NP: 5 | ||
358 | +tregex: NP <1 (NP=c1 <: NN) <2 /,/=comma1 <3 (NP=c2 <: NN) <4 /,/=comma2 <5 CC|CONJP=cc <6 (NP=c3 <: NN) <7 NP=end <- =end | ||
359 | +operation: prune c2 cc c3 comma1 comma2 | ||
360 | +operation: prune c1 cc c3 comma1 comma2 | ||
361 | +operation: prune c1 c2 cc comma1 comma2 | ||
362 | + | ||
363 | +//3 NN , NN , CC NN NN NNS: 2 | ||
364 | +tregex: NP <1 NN=c1 <2 /,/=comma1 <3 NN=c2 <4 /,/=comma2 <5 CC|CONJP=cc <6 NN=c3 <7 NN <8 /NNS/=end <- =end | ||
365 | +operation: prune c2 cc c3 comma1 comma2 | ||
366 | +operation: prune c1 cc c3 comma1 comma2 | ||
367 | +operation: prune c1 c2 cc comma1 comma2 | ||
368 | + | ||
369 | +// 3 NP(JJ NN) , NP , CC NP | ||
370 | +tregex: NP=p <1 (NP=c1 <1 (JJ) <2 (NN=nn1)) <2 (/,/) <3 (NP <1 (NN=nn2)) <4 (/,/) <5 (CC) <6 (NP=c3 <1 (NN=nn3)) <- c3 | ||
371 | +operation: replace p c1 | ||
372 | +operation: replace nn1 nn2, replace p c1 | ||
373 | +operation: replace nn1 nn3, replace p c1 | ||
374 | + | ||
375 | +// 4 | ||
376 | +tregex: NP=p <1 /NP|NN/=c1 <2 /,/=comma1 <3 /NP|NN/=c2 <4 /,/=comma2 <5 /NP|NN/=c3 <6 CC|CONJP=cc <7 /NP|NN/=c4 <8 /NP|NN/=end <- =end | ||
377 | +operation: prune cc c1 c2 c3 comma1 comma2 | ||
378 | +operation: prune cc c2 c3 c4 comma1 comma2 | ||
379 | +operation: prune cc c3 c4 c1 comma1 comma2 | ||
380 | +operation: prune cc c4 c1 c2 comma1 comma2 | ||
381 | + | ||
382 | +// 4 | ||
383 | +tregex: NP=p <1 (NP=c1 <1 (NN) <2 (NN)) <2 (/,/) <3 (NP=c2 <1 (NN)) <4 (/,/) <5 (NP=c3 <1 (NN)) <6 (/,/) <7 (CC) <8 (NP=c4 <1 (NN)) <- =c4 | ||
384 | +operation: replace p c1 | ||
385 | +operation: replace p c2 | ||
386 | +operation: replace p c3 | ||
387 | +operation: replace p c4 | ||
388 | + | ||
389 | +//4 NN , NN , NN , CC NN NN: 4 | ||
390 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 CC|CONJP <7 (NP=c4 <1 NN=nn4 <2 NNS=nn5 <- =nn5) | ||
391 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 /,/ <7 CC|CONJP <8 (NP=c4 <1 NN=nn4 <2 NNS=nn5 <- =nn5) | ||
392 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 CC|CONJP <7 (NP=c4 <1 NN=nn4 <2 (NN=nn5 <<: /mRNA/) <- =nn5) | ||
393 | +tregex: NP=p <1 (NP=c1 <: NN=nn1) <2 /,/ <3 (NP=c2 <: NN=nn2) <4 /,/ <5 (NP=c3 <: NN=nn3) <6 /,/ <7 CC|CONJP <8 (NP=c4 <1 NN=nn4 <2 (NN=nn5 <<: /mRNA/) <- =nn5) | ||
394 | +operation: replace p c4 | ||
395 | +operation: move nn5 $- nn1, replace p c1 | ||
396 | +operation: move nn5 $- nn2, replace p c2 | ||
397 | +operation: move nn5 $- nn2, replace p c3 | ||
398 | + | ||
399 | +tregex: (NP=n6 <1 (NP=n7) <2 (/,/=n9) <3 (NP=n10) <4 (/,/=n12) <5 (NP=n13) <6 (/,/=n15) <7 (NP=n16) <8 (/,/=n18) <9 (NP=n19) <10 (/,/=n21) <11 (NP=n22) <- =n22) | ||
400 | +operation: replace n6 n7 | ||
401 | +operation: replace n6 n10 | ||
402 | +operation: replace n6 n13 | ||
403 | +operation: replace n6 n16 | ||
404 | +operation: replace n6 n19 | ||
405 | +operation: replace n6 n22 | ||
406 | + | ||
407 | +tregex: (NP=n6 <1 (NP=n7) <2 (/,/=n9) <3 (NP=n10) <4 (/,/=n12) <5 (NP=n13) <6 (/,/=n15) <7 (NP=n16) <8 (/,/=n18) <9 (NP=n19) <- =n19) | ||
408 | +operation: replace n6 n7 | ||
409 | +operation: replace n6 n10 | ||
410 | +operation: replace n6 n13 | ||
411 | +operation: replace n6 n16 | ||
412 | +operation: replace n6 n19 | ||
413 | + | ||
414 | +tregex: (NP=n6 <1 (NP=n7) <2 (/,/=n9) <3 (NP=n10) <4 (/,/=n12) <5 (NP=n13) <6 (/,/=n15) <7 (NP=n16) <- =n16) | ||
415 | +operation: replace n6 n7 | ||
416 | +operation: replace n6 n10 | ||
417 | +operation: replace n6 n13 | ||
418 | +operation: replace n6 n16 | ||
419 | + | ||
420 | +tregex: (NP=n6 <1 (NP=n7) <2 (/,/=n9) <3 (NP=n10) <4 (/,/=n12) <5 (NP=n13) <- =n13) | ||
421 | +operation: replace n6 n7 | ||
422 | +operation: replace n6 n10 | ||
423 | +operation: replace n6 n13 | ||
424 | + | ||
425 | +//1 PP ; PP ; PP cc PP | ||
426 | +tregex: (PP=n6 <1 (PP=n7) <3 (PP=n10) <5 (PP=n12) <7 (PP=n13) <- =n13) | ||
427 | +operation: replace n6 n7 | ||
428 | +operation: replace n6 n10 | ||
429 | +operation: replace n6 n12 | ||
430 | +operation: replace n6 n13 |
isimp_v2/rules/hypernymy-detect.txt
0 → 100644
1 | +// identified NP as NP | ||
2 | +tregex: __=p < (VP <1 (/^VB.?$/ <<: /^(identif)(y|ies|ied|ying)|(recogniz|determin)(e|es|ed|ing)?|(regard|consider|view|treat)(s|ed|ing)?|(think|thought)|(map)(s|ped|ping)?|(reveal)(s|ed|ing)?|(disclos)(e|es|ed|ing)?$/) <2 (NP=hype !< IN) <3 (PP <1 (IN <<: /as/) <2 NP=hypo)) | ||
3 | +tregex: __=p < (VP <1 (/^VB.?$/ <<: /^(identif)(y|ies|ied|ying)|(recogniz|determin)(e|es|ed|ing)?|(regard|consider|view|treat)(s|ed|ing)?|(think|thought)|(map)(s|ped|ping)?|(reveal)(s|ed|ing)?|(disclos)(e|es|ed|ing)?$/) <2 (NP <1 (NP=hype !< IN) <2 (PP <1 (IN <<: /as/) <2 NP=hypo))) | ||
4 | + | ||
5 | +// identified NP , as NP | ||
6 | +tregex: __=p < (VP <1 (/^VB.?$/ <<: /^(identif)(y|ies|ied|ying)|(recogniz|determin)(e|es|ed|ing)?|(regard|consider|view|treat)(s|ed|ing)?|(think|thought)|(map)(s|ped|ping)?|(reveal)(s|ed|ing)?|(disclos)(e|es|ed|ing)?$/) <2 (NP=hype !< IN) <3 /,/ <4 (PP <1 (IN <<: /as/) <2 NP=hypo)) | ||
7 | + | ||
8 | +// NP was identified as NP | ||
9 | +tregex: S=p < (NP=hype $+ (VP <1 (/^VB.*$/ <<: /is|was|were|are/) <2 (VP <1 (/^VB.?$/ <<: /^identified|recognized|regarded|thought|mapped|revealed|known|disclosed$/) <2 (PP <1 (IN <<: /as/) <2 NP=hypo)))) | ||
10 | +tregex: S=p < (NP=hype $+ (VP <1 (/^VB.*$/ <<: /is|was|were|are/) <2 (VP <1 (/^VB.?$/ <<: /^identified|recognized|regarded|thought|mapped|revealed|known|disclosed$/) <2 (PP <1 (IN <<: /as/) <2 NP=hypo)))) | ||
11 | + | ||
12 | +// NP has been identified as NP | ||
13 | +tregex: S=p < (NP=hype $+ (VP <1 (/^VB.*$/ <<: /has|had|have/) <2 (VP <1 (/^VB.*$/ <<: /been/) <2 (VP <1 (/^VB.?$/ <<: /^identified|recognized|regarded|thought|mapped|revealed|known|disclosed$/) <2 (PP <1 (IN <<: /as/) <2 NP=hypo))))) | ||
14 | +tregex: S=p < (NP=hype $+ (VP <1 (/^VB.*$/ <<: /has|had|have/) <2 ADVP <3 (VP <1 (/^VB.*$/ <<: /been/) <2 (VP <1 (/^VB.?$/ <<: /^identified|recognized|regarded|thought|mapped|revealed|known|disclosed$/) <2 (PP <1 (IN <<: /as/) <2 NP=hypo))))) | ||
15 | + | ||
16 | +// NP , termed NP | ||
17 | +tregex: NP=p <1 (NP=hype) <2 /,/ <3 (VP <1 (/^VBN$/ <<: /^termed$/) <2 NP=hypo) | ||
18 | +tregex: VP=p <1 (VP <<- NP=hype) <2 /,/ <3 (VP <1 (/^VBN$/ <<: /^termed$/) <2 NP=hypo) | ||
19 | + | ||
20 | +// NP , known as NP | ||
21 | +tregex: NP=p <1 (NP=hype) <2 /,/ <3 (VP <1 (/^VBN$/ <<: /^known$/) <2 (PP <1 (IN << /as/) <2 NP=hypo)) | ||
22 | +tregex: NP=p <1 (NP=hype) <2 PP <3 /,/ <4 (VP <1 (/^VBN$/ <<: /^known$/) <2 (PP <1 (IN << /as/) <2 NP=hypo)) | ||
23 | + | ||
24 | +// NP is a NP | ||
25 | +tregex: S=p <1 NP=hype <2 (VP <1 (/^VB|VBZ|VBP$/ <<: /is|are/) <2 (NP=hypo <<, /^a|an$/)) | ||
26 | +tregex: S=p <1 NP=hype <2 (VP <-1 (VP <1 (/^VB|VBZ|VBP$/ <<: /is|are|be|been/) <2 (NP=hypo <<, /^a|an$/))) | ||
27 | + | ||
28 | +// NP act|serves as NP | ||
29 | +tregex: S=p < (NP=hype $+ (VP <1 (/^VB|VBZ|VBP|VBD$/ <<: /(act|serve)(s|d|ed)?/) <2 (PP <1 (IN << /as/) <2 NP=hypo))) | ||
30 | +tregex: S=p < (NP=hype $+ (VP <1 (/^VB|VBZ|VBP|VBD$/ <<: /(act|serve)(s|d|ed)?/) <2 (SBAR <1 (IN << /as/) <2 S=hypo))) | ||
31 | +tregex: S=p < (NP=hype $+ (VP <1 MD <2 (VP <1 (/^VB|VBZ|VBP|VBD$/ <<: /(act|serve)/) <2 (PP <1 (IN << /as/) <2 NP=hypo)))) | ||
32 | + | ||
33 | +// identification of NP as NP | ||
34 | +tregex: NP=p <1 (NP <<- /(i|I)dentification/) <2 (PP <1 (IN << /of/) <2 (NP <1 NP=hype <2 (PP <1 (IN <<: /as/) <2 NP=hypo))) | ||
35 | +tregex: __=p < (__ <<- (NP <1 (NP <<- /(i|I)dentification/) <2 (PP <1 (IN << /of/) <2 NP=hype)) $+ (PP <1 (IN <<: /as/) <2 NP=hypo)) | ||
36 | + |
isimp_v2/rules/member-collection-detect.txt
0 → 100644
1 | +// NP such as NP | ||
2 | +tregex: __=p < (NP=tr $+ (PP <1 (JJ <: /such/) <2 (IN <: /as/) <3 NP=arg)) | ||
3 | +tregex: __=p < ((PP <1 IN <-1 NP=tr) $+ (PP <1 (JJ <: /such/) <2 (IN <: /as/) <3 NP=arg)) | ||
4 | + | ||
5 | +// NP , such as NP | ||
6 | +tregex: __=p < (NP=tr $+ (/,/ $+ (PP <1 (JJ <: /such/) <2 (IN <: /as/) <3 NP=arg))) | ||
7 | +tregex: __=p < (NP=tr $+ (/,/ $+ (ADJP <1 (JJ <: /such/) <2 (PP <1 (IN <: /as/) <2 NP=arg)))) | ||
8 | +tregex: __=p < (NP=tr $+ (/,/ $+ (ADJP <1 (JJ <: /such/) <2 (PP <1 (IN <: /as/) <2 NP=arg)))) | ||
9 | +tregex: __=p < (NP=tr $+ (/,/ $+ (CONJP <1 (JJ <: /such/) <2 (IN <: /as/) $+ (NP=arg)))) | ||
10 | +tregex: __=p < ((PP <1 IN <-1 NP=tr) $+ (/,/ $+ (PP <1 (JJ <: /such/) <2 (IN <: /as/) <3 NP=arg))) | ||
11 | +tregex: __=p < ((VP <-1 (PP <-1 NP=tr)) $+ (/,/ $+ (PP <1 (JJ <: /such/) <2 (IN <: /as/) <3 NP=arg))) | ||
12 | + | ||
13 | +// NP , including NP | ||
14 | +tregex: __=p < (NP=tr $+ (/,/ $+ (PP <<, /including/ <2 NP=arg))) | ||
15 | +tregex: __=p < ((VP <-1 (PP <-1 NP=tr)) $+ (/,/ $+ (PP <<, /including/ <2 NP=arg))) | ||
16 | +tregex: __=p < ((VP <-1 NP=tr) $+ (/,/ $+ (PP <<, /including/ <2 NP=arg))) | ||
17 | +tregex: __=p < ((VP <-1 NP=tr) $+ (/,/ $+ (VP <<, /including/ <2 NP=arg))) | ||
18 | +tregex: __=p < ((PP <1 IN <-1 NP=tr) $+ (/,/ $+ (PP <<, /including/ <2 NP=arg))) | ||
19 | + | ||
20 | +// NP including NP | ||
21 | +tregex: __=p < (NP=tr $+ (PP <<, /including/ <2 NP=arg)) | ||
22 | +tregex: __=p < (NP=tr $+ (VP <<, /including/ <2 NP=arg)) | ||
23 | +tregex: __=p < ((PP <1 IN <-1 NP=tr) $+ (PP <<, /including/ <2 NP=arg)) | ||
24 | +tregex: __=p < (NP=tr $+ (/:/ $+ (S <1 (VP <<, /including/ <2 NP=arg)))) | ||
25 | + | ||
26 | +// NP which includes NP | ||
27 | +tregex: __=p < (NP=tr $+ (SBAR <<, /which|that/ <2 (S <1 (VP <<, /include/ <2 NP=arg)))) | ||
28 | + | ||
29 | +// NP , which includes NP | ||
30 | +tregex: __=p < (NP=tr $+ (/,/ $+ (SBAR <<, /which/ <2 (S <1 (VP <<, /include/ <2 NP=arg))))) | ||
31 | +tregex: __=p < ((PP <1 IN <-1 NP=tr) $+ (/,/ $+ (SBAR <<, /which/ <2 (S <1 (VP <<, /include/ <2 NP=arg))))) | ||
32 | + | ||
33 | +// classes of NP | ||
34 | +tregex: NP=p <1 (NP=tr <<- /class|variety/ !< PP) <2 (PP <<, /of/ <2 (NP=arg !< PP)) | ||
35 | +tregex: NP=p <1 (NP=tr <<- /class|variety/ !< PP) <2 (PP <<, /of/ <2 (NP <1 NP=arg < PP)) |
isimp_v2/rules/others.txt
0 → 100644
isimp_v2/rules/parenthesis-adapter.txt
0 → 100644
1 | +// (NP -LRB- __ -RRB-) --> (PRN -LRB- __ -RRB-) | ||
2 | +tregex: NP=np <1 -LRB- <3 -RRB-=rrb <- =rrb | ||
3 | +operation: relabel np PRN | ||
4 | + | ||
5 | +// LRB is not the first child of the parent | ||
6 | +tregex: -LRB-=lrb $+ (__=xx $+ -RRB-=rrb) !>1 __ | ||
7 | +operation: adjoinF (PRN @) lrb, move xx $- lrb, move rrb $- xx | ||
8 | + | ||
9 | +// PRN is not the last child of the parent | ||
10 | +tregex: PRN=prn !>- __ $- NP=np | ||
11 | +operation: adjoinF (NP @) np, move prn $- np | ||
12 | + | ||
13 | +// If the ref NP has PP, find the right most NP | ||
14 | +tregex: PRN=prn $- (NP < PP <<- (NP=np !< PP)) | ||
15 | +operation: adjoinF (NP @) np, move prn $- np | ||
16 | + | ||
17 | +// If the ref is VP, find the right most NP | ||
18 | +tregex: PRN=prn $- (VP <<- (NP=np !< PP)) | ||
19 | +operation: adjoinF (NP @) np, move prn $- np | ||
20 | + |
isimp_v2/rules/parenthesis-detect.txt
0 → 100644
isimp_v2/rules/parenthesis.txt
0 → 100644
1 | +tregex: NP < (NN=n1 $+ (-LRB-=lrb $+ (NN=n2 $+ (NN=n3 $+ -RRB-=rrb)))) | ||
2 | +operation: prune lrb n2 n3 rrb | ||
3 | +operation: prune n1 lrb rrb | ||
4 | + | ||
5 | +tregex: NP=n0 <1 (NP=n1 !< PP) <2 (PRN=n3 <1 (-LRB-=n4) <2 (NP|QP|ADJP=n5) <3 (-RRB-=n7) <- =n7) <- =n3 | ||
6 | +operation: replace n0 n1 | ||
7 | +operation: replace n0 n5 | ||
8 | + | ||
9 | +tregex: ADJP=n0 <1 (JJ=n1) <2 (PRN=n2 <1 (-LRB-=n3) <2 (NP=n4) <3 (-RRB-=n6) <- =n6) <- =n2 | ||
10 | +operation: replace n2 n4 | ||
11 | + | ||
12 | +tregex: S=n0 <1 (PRN=n1 <1 (-LRB-=n2) <2 (NP=n3) <3 (-RRB-=n13) <- =n13) <- =n1 | ||
13 | +operation: prune n1 | ||
14 | + | ||
15 | +tregex: S=n0 <1 (-LRB-=n1) <2 (NP=n2) <3 (VP=n13) <4 (-RRB-=n25) <- =n25 | ||
16 | +operation: prune n0 | ||
17 | +operation: new n0 | ||
18 | + | ||
19 | +tregex: NP=n0 <1 (NN=n1) <2 (-LRB-=n2) <3 (CD=n3) <4 (-RRB-=n4) <- =n4 | ||
20 | +operation: prune n2 n4 | ||
21 | +operation: prune n2 n3 n4 | ||
22 | + | ||
23 | +tregex: __ < (@/N.*|JJ|CD/=n0 $+ (-LRB-=n1 $+ (__=n2 $+ -RRB-=n3))) | ||
24 | +operation: prune n1 n2 n3 | ||
25 | +operation: prune n0 n1 n3 | ||
26 | + | ||
27 | +tregex: NP=n0 <1 (LST=n1 <1 (-LRB-=n2) <2 (LS=n3) <3 (-RRB-=n4) <- =n4) | ||
28 | +operation: prune n1 | ||
29 | + | ||
30 | +tregex: VP=n0 <1 (VBN=n1) <2 (PRN=n2 <1 (-LRB-=n3) <2 (NP=n4) <3 (-RRB-=n6) <- =n6) <- =n2 | ||
31 | +operation: replace n2 n4 | ||
32 | + | ||
33 | +tregex: VP=n0 <1 (VBN=n1) <2 (ADVP=n2 <1 (RB=n3) <- =n3) <3 (PRN=n4 <1 (-LRB-=n5) <2 (NP=n6) <3 (-RRB-=n8) <- =n8) <- =n4 | ||
34 | +operation: replace n2 n6, prune n4 | ||
35 | + | ||
36 | +tregex: NP=n0 <1 (NP=n1 <1 (NP=n2) <2 (PP=n5 <1 (IN=n6) <2 (NP=n7) <- =n7) <- =n5) <2 (PRN=n11 <1 (-LRB-=n12) <2 (NP=n13) <3 (-RRB-=n15) <- =n15) <- =n11 | ||
37 | +operation: replace n0 n1 | ||
38 | +operation: replace n0 n13 | ||
39 | +operation: replace n7 n13, prune n11 | ||
40 | + | ||
41 | +tregex: __ < (ADJP|JJ=n0 $+ (PRN=n3 <1 (-LRB-=n4) <2 (__=n5) <3 (-RRB-=n7) <- =n7)) | ||
42 | +operation: replace n3 n5 | ||
43 | + | ||
44 | +tregex: __ < (NP|NN=n0 $+ (PRN=n3 <1 (-LRB-=n4) <2 (__=n5) <3 (-RRB-=n7) <- =n7)) | ||
45 | +operation: replace n0 n5 | ||
46 | +operation: prune n3 | ||
47 | + | ||
48 | +tregex: NP=n0 <1 (NP=n1) <2 (PRN=n5 <1 (-LRB-=n6) <2 (NP=n7) <3 (/:/=n9) <4 (NP=n10) <5 (/,/=n13) <6 (NP=n14) <7 (-RRB-=n17) <- =n17) <- =n5 | ||
49 | +operation: replace n0 n1 | ||
50 | +operation: replace n0 n7 | ||
51 | +operation: replace n0 n10 | ||
52 | +operation: replace n0 n14 | ||
53 | + | ||
54 | +tregex: NP=n0 <1 (NP=n1) <2 (PRN=n4 <1 (-LRB-=n5) <2 (NP=n6) <3 (/,/=n9) <4 (NP=n10) <5 (-RRB-=n13) <- =n13) <- =n4 | ||
55 | +operation: replace n0 n1 | ||
56 | +operation: replace n0 n6 | ||
57 | +operation: replace n0 n10 | ||
58 | + | ||
59 | +tregex: (PP=n0 <1 (IN=n1) <2 (NP=n2) <- =n2) $+ (PRN=n4 <1 (-LRB-=n5) <2 (NP=n6) <3 (-RRB-=n8) <- =n8) | ||
60 | +operation: prune n4 | ||
61 | +operation: replace n2 n6, prune n4 | ||
62 | + | ||
63 | +tregex: NP=n0 <1 (-LRB-=n1) <2 (NP=n2) <3 (-RRB-=n6) <- =n6 | ||
64 | +operation: replace n0 n2 | ||
65 | + | ||
66 | +#tregex: __=n0 <1 (-LRB-=n1) <2 (__=n2) <3 (-RRB-=n3) | ||
67 | +#operation: prune n1 n3 | ||
68 | + | ||
69 | +tregex: NP=n0 <1 (JJ=n1) <2 (-LRB-=n2) <3 (NN=n3) <4 (CD=n4) <5 (-RRB-=n5) <6 (JJ=n6) <7 (NN=n7) <8 (NN=n8) <- =n8 | ||
70 | +operation: prune n2 n5 | ||
71 | + | ||
72 | +tregex: ADJP=n0 <1 (ADJP=n1) <2 (PRN=n3 <1 (-LRB-=n4) <2 (NP=n5) <3 (/,/=n8) <4 (NP=n9) <5 (-RRB-=n12) <- =n12) <- =n3 | ||
73 | +operation: replace n3 n5 | ||
74 | +operation: replace n3 n9 | ||
75 | + | ||
76 | +tregex: NP=n0 <1 (NN=n1) <2 (NN=n2) <3 (JJ=n3) <4 (NN=n4) <5 (-LRB-=n5) <6 (NN=n6) <7 (NN=n7) <8 (-RRB-=n8) <9 (NN=n9) <- =n9 | ||
77 | +operation: prune n5 n8 n6 n7 | ||
78 | +operation: prune n4 n5 n8 | ||
79 | + | ||
80 | +tregex: NP=n0 <1 (NP=n1) <2 (NN=n7) <3 (-LRB-=n8) <4 (NN=n9) <5 (NN=n10) <6 (-RRB-=n11) <7 (NN=n12) <- =n12 | ||
81 | +operation: prune n8 n9 n10 n11 | ||
82 | +operation: prune n7 n8 n11 | ||
83 | + | ||
84 | +tregex: VP=n0 <1 (VBD=n1) <2 (ADVP=n2) <3 (PP=n4 <1 (IN=n5) <2 (NP=n6) <- =n6) <4 (PRN=n9 <1 (-LRB-=n10) <2 (NP=n11) <3 (/,/=n14) <4 (NP=n15) <5 (-RRB-=n21) <- =n21) <- =n9 | ||
85 | +operation: prune n9 | ||
86 | +operation: replace n6 n11, prune n9 | ||
87 | +operation: replace n6 n15, prune n9 | ||
88 | + | ||
89 | +tregex: NP=n0 <1 (NP=n1) <2 (PRN=n5 <1 (-LRB-=n6) <2 (CC|CONJP=n7) <3 (ADVP=n8) <4 (NP=n10) <5 (-RRB-=n13) <- =n13) <- =n5 | ||
90 | +operation: replace n0 n1 | ||
91 | +operation: replace n0 n10 | ||
92 | + | ||
93 | +tregex: NP=n0 <1 (NP=n1) <2 (PRN=n6 <1 (-LRB-=n7) <2 (PP=n8 <1 (FW=n9) <- =n9) <3 (/,/=n10) <4 (NP=n11) <5 (-RRB-=n18) <- =n18) <- =n6 | ||
94 | +operation: replace n0 n1 | ||
95 | +operation: replace n0 n11 | ||
96 | + | ||
97 | +tregex: NP=n0 <1 (NP=n1) <2 (PRN=n4 <1 (-LRB-=n5) <2 (PP=n6 <1 (FW=n7) <2 (NP=n8) <- =n8) <3 (/,/=n10) <4 (NP=n11) <5 (-RRB-=n13) <- =n13) <- =n4 | ||
98 | +operation: replace n0 n1 | ||
99 | +operation: replace n0 n8 | ||
100 | +operation: replace n0 n11 | ||
101 | + | ||
102 | +tregex: NP=n0 <1 (NP=n1) <2 (PRN=n3 <1 (-LRB-=n4) <2 (CC|CONJP=n5) <3 (NP=n6) <4 (-RRB-=n14) <- =n14) <- =n3 | ||
103 | +operation: replace n0 n1 | ||
104 | +operation: replace n0 n6 | ||
105 | + | ||
106 | +tregex: NP=n0 <1 (NP=n1) <2 (PRN=n4 <1 (-LRB-=n5) <2 (NP=n6) <3 (/:/=n8) <4 (NP=n9) <5 (-RRB-=n11) <- =n11) <- =n4 | ||
107 | +operation: replace n0 n1 | ||
108 | +operation: replace n0 n6 | ||
109 | +operation: replace n0 n9 | ||
110 | + | ||
111 | +tregex: NP=n0 <1 (NP=n1) <2 (-LRB-=n7) <3 (CC|CONJP=n8) <4 (NP=n9) <5 (-RRB-=n16) <- =n16 | ||
112 | +operation: replace n0 n1 | ||
113 | +operation: replace n0 n9 | ||
114 | + | ||
115 | +tregex: NN=n1 $+ (-LRB-=n2 $+ (NN|JJ=n3 $+ (NN|CD=n4 $+ (-RRB-=n5)))) | ||
116 | +operation: prune n2 n3 n4 n5 | ||
117 | +operation: prune n1 n2 n5 | ||
118 | + | ||
119 | +tregex: NP=n0 <1 (NP=n1) <2 (-LRB-=n3) <3 (CC|CONJP=n4) <4 (NP=n5) <5 (-RRB-=n7) <- =n7 | ||
120 | +operation: replace n0 n1 | ||
121 | +operation: replace n0 n5 | ||
122 | + | ||
123 | +tregex: NP=n0 <1 (NP=n1) <2 (PRN=n4 <1 (-LRB-=n5) <2 (NP=n6) <3 (PP=n8) <4 (-RRB-=n15) <- =n15) <- =n4 | ||
124 | +operation: replace n0 n1 | ||
125 | +operation: replace n1 n6, replace n4 n8 | ||
126 | + | ||
127 | +tregex: NN=n5 $+ (-LRB-=n6 $+ (NN=n7 $+ (/,/=n8 $+ (NN=n9 $+ -RRB-=n10)))) | ||
128 | +operation: prune n6 n7 n8 n9 n10 | ||
129 | +operation: prune n5 n6 n8 n9 n10 | ||
130 | +operation: prune n5 n6 n7 n8 n10 | ||
131 | + | ||
132 | +tregex: NP=n0 <1 (NP=n1) <2 (PRN=n4 <1 (-LRB-=n5) <2 (PP=n6 <1 (IN=n7 <: /for/) <2 (NP=n8 <1 (NN=n9 < /example/) <- =n9) <- =n8) <3 (/,/=n10) <4 (NP=n11 <1 (NP=n12) <2 (/,/=n14) <3 (NP=n15) <- =n15) <5 (-RRB-=n17) <- =n17) <- =n4 | ||
133 | +operation: replace n0 n1 | ||
134 | +operation: replace n0 n11 | ||
135 | +operation: replace n0 n15 |
isimp_v2/rules/relative clause-adapter.txt
0 → 100644
1 | +// ref is pp | ||
2 | +tregex: (PP <<- (NP=np !< PP)) $+ (SBAR=clause <1 (WHNP|WHPP|WHADVP) <2 (S <1 /^VP/=vp)) | ||
3 | +operation: adjoinF (NP @) np, move clause $- np | ||
4 | + | ||
5 | +tregex: (PP <<- (NP=np !< PP)) $+ (/,/=comma $+ (SBAR=clause <1 (WHNP|WHPP|WHADVP) <2 (S <1 /^VP/=vp))) | ||
6 | +operation: adjoinF (NP @) np, move comma $- np, move clause $- comma |
isimp_v2/rules/relative clause-detect.txt
0 → 100644
1 | +// whnp relative clause | ||
2 | +tregex: /^NP/=p < (/^NP/=ref $+ (/^SBAR/=clause <1 (WHNP) <2 (S <1 /^VP/=vp))) | ||
3 | +tregex: /^NP/=p < (/^NP/=ref $+ (/^SBAR/=clause <1 (WHNP) <2 (S <1 PP|ADVP <2 /^VP/=vp))) | ||
4 | +tregex: /^NP/=p < (/^NP/=ref $+ (/,/ $+ (/^SBAR/=clause <1 (WHNP) <2 (S <1 /^VP/=vp)))) | ||
5 | +tregex: /^NP/=p < (/^NP/=ref $+ (/,/ $+ (/^SBAR/=clause <1 (WHNP) <2 (S <1 PP|ADVP <2 /^VP/=vp)))) | ||
6 | +// only one case | ||
7 | +tregex: /^NP/=p < (/^NP/=ref $+ (/,/ $+ (/^SBAR/=clause <1 (WHNP <<: /which/) <2 (S <1 /^NP/=vp)))) | ||
8 | + | ||
9 | +// wrb relative clause | ||
10 | +tregex: /^NP/=p < (/^NP/=ref $+ (/^SBAR/=clause <1 WHADVP <2 S=s)) | ||
11 | +tregex: /^NP/=p < (/^NP/=ref $+ (/,/ $+ (/^SBAR/=clause <1 WHADVP <2 S=s))) | ||
12 | + | ||
13 | +// whpp relative clause | ||
14 | +tregex: /^NP/=p < (/^NP/=ref $+ (/^SBAR/=clause <1 (WHPP <1 IN=in) <2 (S <1 /^NP/))) | ||
15 | +tregex: /^NP/=p < (/^NP/=ref $+ (/,/ $+ (/^SBAR/=clause <1 (WHPP <1 IN=in) <2 (S <1 /^NP/)))) | ||
16 | + | ||
17 | +// wp$ relative clause | ||
18 | +tregex: /^NP/=p < (/^NP/=ref $+ (/,/ $+ (/^SBAR/=clause <1 /^WP[$]/ <2 S=s))) | ||
19 | + | ||
20 | +// NP of NP | ||
21 | +tregex: /^NP/=p < (/^NP/=ref1 $+ ((/^PP/ <1 IN <2 NP=ref) $+ (/^SBAR/=clause <1 (WHNP=whnp <1 /WP\$/=wp <2 __=np) <2 (S <1 /^VP/=vp)))) | ||
22 | +tregex: /^NP/=p < (/^NP/=ref1 $+ ((/^PP/ <2 NP=ref2) $+ (/,/=comma $+ (/^SBAR/=clause <1 (WHNP=whnp <1 /WP\$/=wp <2 __=np) <2 (S <1 /^VP/=vp))))) | ||
23 | +tregex: /^NP/=p < (/^NP/=ref1 $+ (/,/ $+ (((/^VP/ <2 /^S|NP$/=ref2) $+ (/,/=comma $+ (/^SBAR/=clause <1 (WHNP=whnp <1 /WP\$/=wp <2 __=np) <2 (S <1 /^VP/=vp))))))) | ||
24 | + | ||
25 | +// NP of NP | ||
26 | +tregex: /^NP/=p < (/^NP/=ref1 $+ ((PP <1 IN <2 NP=ref) $+ (/^SBAR/=clause <1 (WHNP <1 WDT) <2 (S <1 /^VP/=vp)))) | ||
27 | +tregex: /^NP/=p < (/^NP/=ref1 $+ ((PP <2 NP=ref) $+ (/,/=comma $+ (/^SBAR/=clause <1 (WHNP <1 WDT) <2 (S <1 /^VP/=vp))))) | ||
28 | +tregex: /^NP/=p < (/^NP/=ref1 $+ (/,/ $+ (((/^VP/ <2 /^S|NP$/=ref) $+ (/,/=comma $+ (/^SBAR/=clause <1 (WHNP <1 WDT) <2 (S <1 /^VP/=vp))))))) | ||
29 | + | ||
30 | +// as demenstrated ... | ||
31 | +#tregex: /^NP/=p < (/^NP/=ref $+ (/^SBAR/=clause <1 (IN << /as/) <2 (S <1 /^VP/=vp))) | ||
32 | + | ||
33 | +// reduced relative clause | ||
34 | +tregex: /^NP/=p <1 /^NP/=ref <2 (/^VP/=clause <1 VBN|VBG) | ||
35 | +tregex: /^NP/=p <1 /^NP/=ref <2 (/^PP/=clause <1 (VBG << /including/)) | ||
36 | +tregex: /^NP/=p <1 @/N.*/=ref <2 /,/ <3 (/^VP/=clause <1 VBN|VBG) | ||
37 | +tregex: __=p < (/^NP/=ref $+ (/^VP/=clause <1 /^VBG/)) | ||
38 | +tregex: __=p < (/^NP/=ref $+ (/,/ $+ (/^S/ <: (/^VP/=clause <1 /^VBG/)))) | ||
39 | +#tregex: /^NP/=p <1 /^NP/=ref <2 VBN|VBG=clause | ||
40 | + | ||
41 | +#tregex: __=p < (/,/=comma $+ (S=s <: (/^VP/=clause <1 (VBG) <2 (/^SBAR/ <2 S=s2)))) | ||
42 | + |
isimp_v2/rules/relative clause.txt
0 → 100644
1 | +// whnp relative clause | ||
2 | +// 16 | ||
3 | +tregex: NP=p < (NP=ref $+ (SBAR <1 (WHNP=whnp <1 /WP\$/=wp <2 __=np) <2 (S=clause))) | ||
4 | +operation: replace p ref | ||
5 | +operation: replace wp ref, relabel whnp NP | ||
6 | + | ||
7 | +// NP of NP | ||
8 | +tregex: NP=p < (NP=ref1 $+ ((PP <1 IN <2 NP=ref2) $+ (SBAR=clause <1 (WHNP=whnp <1 /WP\$/=wp <2 __=np) <2 (S <1 VP=vp)))) | ||
9 | +operation: prune clause | ||
10 | +operation: replace wp ref1, relabel whnp NP | ||
11 | +operation: replace wp ref2, relabel whnp NP | ||
12 | + | ||
13 | +tregex: NP=p < (NP=ref1 $+ ((PP <2 NP=ref2) $+ (/,/=comma $+ (SBAR=clause <1 (WHNP=whnp <1 /WP\$/=wp <2 __=np) <2 (S <1 VP=vp))))) | ||
14 | +operation: prune clause comma | ||
15 | +operation: replace wp ref1, relabel whnp NP | ||
16 | +operation: replace wp ref2, relabel whnp NP | ||
17 | + | ||
18 | +tregex: NP=p < (NP=ref1 $+ (/,/ $+ (((VP <2 /^S|NP$/=ref2) $+ (/,/=comma $+ (SBAR=clause <1 (WHNP=whnp <1 /WP\$/=wp <2 __=np) <2 (S <1 VP=vp))))))) | ||
19 | +operation: prune clause comma | ||
20 | +operation: replace wp ref1, relabel whnp NP | ||
21 | +operation: replace wp ref2, relabel whnp NP | ||
22 | + | ||
23 | +// wdt relative clause | ||
24 | +// 154 | ||
25 | +tregex: NP=p < (NP=ref $+ (SBAR=clause <1 (WHNP <1 WDT) <2 (S <1 VP=vp))) | ||
26 | +// 88 | ||
27 | +tregex: NP=p < (NP=ref $+ (/,/ $+ (SBAR=clause <1 (WHNP <1 WDT) <2 (S <1 VP=vp)))) | ||
28 | +operation: replace p ref | ||
29 | +operation: new ref vp | ||
30 | + | ||
31 | +// NP of NP | ||
32 | +tregex: NP=p < (NP=ref1 $+ ((PP <1 IN <2 NP=ref2) $+ (SBAR=clause <1 (WHNP <1 WDT) <2 (S <1 VP=vp)))) | ||
33 | +operation: prune clause | ||
34 | +operation: new ref1 vp | ||
35 | +operation: new ref2 vp | ||
36 | + | ||
37 | +tregex: NP=p < (NP=ref1 $+ ((PP <2 NP=ref2) $+ (/,/=comma $+ (SBAR=clause <1 (WHNP <1 WDT) <2 (S <1 VP=vp))))) | ||
38 | +operation: prune clause comma | ||
39 | +operation: new ref1 vp | ||
40 | +operation: new ref2 vp | ||
41 | + | ||
42 | +tregex: NP=p < (NP=ref1 $+ (/,/ $+ (((VP <2 /^S|NP$/=ref2) $+ (/,/=comma $+ (SBAR=clause <1 (WHNP <1 WDT) <2 (S <1 VP=vp))))))) | ||
43 | +operation: prune clause comma | ||
44 | +operation: new ref1 vp | ||
45 | +operation: new ref2 vp | ||
46 | + | ||
47 | +// wrb relative clause | ||
48 | +// 4 | ||
49 | +tregex: NP=p < (NP=ref $+ (SBAR=clause <1 WHADVP <2 S=s)) | ||
50 | +// 4 | ||
51 | +tregex: NP=p < (NP=ref $+ (/,/ $+ (SBAR=clause <1 WHADVP <2 S=s))) | ||
52 | +operation: replace p ref | ||
53 | +operation: new s | ||
54 | + | ||
55 | +// as demenstrated ... | ||
56 | +// 6 | ||
57 | +tregex: NP=p < (NP=ref $+ (SBAR <1 (IN << /as/) <2 (S <1 VP=vp))) | ||
58 | +operation: replace p ref | ||
59 | + | ||
60 | +// whpp relative clause | ||
61 | +// 18 | ||
62 | +tregex: NP=p < (NP=ref $+ (SBAR <1 (WHPP <1 IN=in) <2 (S=clause <1 NP))) | ||
63 | +operation: replace p ref | ||
64 | +operation: new clause in ref | ||
65 | + | ||
66 | +// reduced relative clause | ||
67 | +// 471 | ||
68 | +tregex: NP=p <1 NP=ref <2 (VP=vp <1 VBN|VBG) | ||
69 | +// 23 | ||
70 | +tregex: NP=p <1 @/N.*/=ref <2 /,/ <3 (VP=vp <1 VBN|VBG) | ||
71 | +// 1 | ||
72 | +tregex: NP=p <1 NP=ref <2 VBN|VBG=vp | ||
73 | +operation: replace p ref | ||
74 | +operation: new ref vp | ||
75 | + | ||
76 | +tregex: __=p < (/,/=comma $+ (S=s <: (VP <1 (VBG) <2 (SBAR <2 S=s2)))) | ||
77 | +operation: prune comma s | ||
78 | +operation: new s2 | ||
79 | + |
isimp_v2/rules/sentenceBeginning.txt
0 → 100644
1 | +// When VBN, Theme | ||
2 | +tregex: S <1 (SBAR=begin <1 WHADVP <2 (S <: (VP=tr))) <2 /,/=comma <3 (__ << NP=arg) | ||
3 | +operation: new arg tr | ||
4 | +operation: prune begin comma | ||
5 | + | ||
6 | +tregex: S <1 (SBAR=begin <1 WHADVP <2 (S <: (VP=tr))) <2 (__ << NP=arg) | ||
7 | +operation: new arg tr | ||
8 | +operation: prune begin | ||
9 | + | ||
10 | +// By VBG, Theme | ||
11 | +tregex: S <1 (PP=begin <2 (S <: (VP=tr))) <2 /,/=comma <3 (NP=arg) | ||
12 | +operation: new arg tr | ||
13 | +operation: prune begin comma | ||
14 | + | ||
15 | +tregex: S <1 (PP=begin <2 (S <: (VP=tr))) <2 (NP=arg) | ||
16 | +operation: new arg tr | ||
17 | +operation: prune begin |
isimp_v2/run_iSimp.sh
0 → 100755
isimp_v2/simplify.sh
0 → 100755
1 | +#!/bin/sh -x | ||
2 | + | ||
3 | +#!/bin/sh | ||
4 | + | ||
5 | +# Might fail if $0 is a link | ||
6 | +TAGGER_HOME=`dirname "$0"` | ||
7 | +CLASSPATH="$TAGGER_HOME/lib/*:$TAGGER_HOME/bin" | ||
8 | +OUTPUT_DIR="$TAGGER_HOME/bin" | ||
9 | +SRC_DIR="$TAGGER_HOME/src" | ||
10 | + | ||
11 | +# get java | ||
12 | +if [ -z "$JAVACMD" ] ; then | ||
13 | + if [ -n "$JAVA_HOME" ] ; then | ||
14 | + JAVACMD="$JAVA_HOME/bin/java" | ||
15 | + else | ||
16 | + JAVACMD="`which java`" | ||
17 | + fi | ||
18 | +fi | ||
19 | + | ||
20 | +$JAVACMD -Xmx1024m -classpath $CLASSPATH:$OUTPUT_DIR main.Console $@ | ||
21 | + |
isimp_v2/testcases/foo-simp.txt
0 → 100644
1 | +{ | ||
2 | + "TYPE": "sentence", | ||
3 | + "TEXT": "The B cell-associated surface molecule CD40 functions to regulate B cell responses.", | ||
4 | + "FROM": 0, | ||
5 | + "TO ": 83, | ||
6 | + "POS ": "The_DT B_NN cell-associated_JJ surface_NN molecule_NN CD40_NN functions_VBZ to_TO regulate_VB B_NN cell_NN responses_NNS ._. ", | ||
7 | + "TREE": "(ROOT (S (NP (DT The) (NN B) (JJ cell-associated) (NN surface) (NN molecule) (NN CD40)) (VP (VBZ functions) (S (VP (TO to) (VP (VB regulate) (NP (NN B) (NN cell) (NNS responses)))))) (. .)))", | ||
8 | + "SIMP": [] | ||
9 | +} | ||
10 | +{ | ||
11 | + "TYPE": "sentence", | ||
12 | + "TEXT": "Cross-linking CD40 on B cells can lead to homotypic cell adhesion, IL-6 production, and, in combination with cytokines, to Ig isotype switching.", | ||
13 | + "FROM": 84, | ||
14 | + "TO ": 228, | ||
15 | + "POS ": "Cross-linking_NN CD40_NN on_IN B_NN cells_NNS can_MD lead_VB to_TO homotypic_JJ cell_NN adhesion_NN ,_, IL-6_NN production_NN ,_, and_CC ,_, in_IN combination_NN with_IN cytokines_NNS ,_, to_TO Ig_NN isotype_NN switching_NN ._. ", | ||
16 | + "TREE": "(ROOT (S (NP (NP (NN Cross-linking) (NN CD40)) (PP (IN on) (NP (NN B) (NNS cells)))) (VP (MD can) (VP (VB lead) (PP (PP (TO to) (NP (NP (JJ homotypic) (NN cell) (NN adhesion)) (, ,) (NP (NN IL-6) (NN production)) (, ,))) (CC and) (PRN (, ,) (PP (IN in) (NP (NP (NN combination)) (PP (IN with) (NP (NNS cytokines))))) (, ,)) (PP (TO to) (NP (NN Ig) (NN isotype) (NN switching)))))) (. .)))", | ||
17 | + "SIMP": [] | ||
18 | +} | ||
19 | +{ | ||
20 | + "TYPE": "sentence", | ||
21 | + "TEXT": "Tyrosine kinase activity is increased shortly after engagement of this receptor.", | ||
22 | + "FROM": 229, | ||
23 | + "TO ": 309, | ||
24 | + "POS ": "Tyrosine_NN kinase_NN activity_NN is_VBZ increased_VBN shortly_RB after_IN engagement_NN of_IN this_DT receptor_NN ._. ", | ||
25 | + "TREE": "(ROOT (S (NP (NN Tyrosine) (NN kinase) (NN activity)) (VP (VBZ is) (VP (VBN increased) (ADVP (RB shortly)) (PP (IN after) (NP (NP (NN engagement)) (PP (IN of) (NP (DT this) (NN receptor))))))) (. .)))", | ||
26 | + "SIMP": [] | ||
27 | +} | ||
28 | +{ | ||
29 | + "TYPE": "sentence", | ||
30 | + "TEXT": "Little is known about how the very early events induced by CD40 cross-linking link to cellular responses.", | ||
31 | + "FROM": 310, | ||
32 | + "TO ": 415, | ||
33 | + "POS ": "Little_JJ is_VBZ known_VBN about_IN how_WRB the_DT very_RB early_JJ events_NNS induced_VBN by_IN CD40_NN cross-linking_NN link_NN to_TO cellular_JJ responses_NNS ._. ", | ||
34 | + "TREE": "(ROOT (S (NP (JJ Little)) (VP (VBZ is) (VP (VBN known) (PP (IN about) (SBAR (WHADVP (WRB how)) (S (NP (DT the) (ADJP (RB very) (JJ early)) (NNS events)) (VP (VBN induced) (PP (IN by) (NP (NP (NN CD40) (NN cross-linking) (NN link)) (PP (TO to) (NP (JJ cellular) (NNS responses))))))))))) (. .)))", | ||
35 | + "SIMP": [] | ||
36 | +} | ||
37 | +{ | ||
38 | + "TYPE": "sentence", | ||
39 | + "TEXT": "In this study, we demonstrate that nuclear factor (NF)-kappa B and NF-kappa B-like transcription factors are activated after cross-linking CD40 on resting human tonsillar B cells and on B cell lines.", | ||
40 | + "FROM": 416, | ||
41 | + "TO ": 615, | ||
42 | + "POS ": "In_IN this_DT study_NN ,_, we_PRP demonstrate_VBP that_IN nuclear_JJ factor_NN -LRB-_-LRB- NF_NN -RRB-_-RRB- -_: kappa_NN B_NN and_CC NF-kappa_NN B-like_JJ transcription_NN factors_NNS are_VBP activated_VBN after_IN cross-linking_JJ CD40_NN on_IN resting_VBG human_JJ tonsillar_JJ B_NN cells_NNS and_CC on_IN B_NN cell_NN lines_NNS ._. ", | ||
43 | + "TREE": "(ROOT (S (PP (IN In) (NP (DT this) (NN study))) (, ,) (NP (PRP we)) (VP (VBP demonstrate) (SBAR (IN that) (S (NP (NP (JJ nuclear) (NN factor)) (PRN (-LRB- -LRB-) (NP (NN NF)) (-RRB- -RRB-))) (: -) (NP (NP (NN kappa) (NN B)) (CC and) (NP (NN NF-kappa) (JJ B-like) (NN transcription) (NNS factors))) (VP (VBP are) (VP (VBN activated) (PP (IN after) (NP (JJ cross-linking) (NN CD40))) (PP (PP (IN on) (S (VP (VBG resting) (NP (JJ human) (JJ tonsillar) (NN B) (NNS cells))))) (CC and) (PP (IN on) (NP (NN B) (NN cell) (NNS lines))))))))) (. .)))", | ||
44 | + "SIMP": [ | ||
45 | + { | ||
46 | + "TYPE": "parenthesis", | ||
47 | + "TEXT": "nuclear factor (NF", | ||
48 | + "FROM": 451, | ||
49 | + "TO ": 469, | ||
50 | + "COMP": [ | ||
51 | + { | ||
52 | + "TYPE": "referred noun phrase", | ||
53 | + "FROM": 451, | ||
54 | + "TO ": 465 | ||
55 | + }, | ||
56 | + { | ||
57 | + "TYPE": "parenthesized elements", | ||
58 | + "FROM": 467, | ||
59 | + "TO ": 469 | ||
60 | + } | ||
61 | + ] | ||
62 | + }, | ||
63 | + { | ||
64 | + "TYPE": "noun or noun phrase coordination", | ||
65 | + "TEXT": "kappa B and NF-kappa B-like transcription factors", | ||
66 | + "FROM": 471, | ||
67 | + "TO ": 520, | ||
68 | + "COMP": [ | ||
69 | + { | ||
70 | + "TYPE": "conjunct", | ||
71 | + "FROM": 471, | ||
72 | + "TO ": 478 | ||
73 | + }, | ||
74 | + { | ||
75 | + "TYPE": "conjunction", | ||
76 | + "FROM": 479, | ||
77 | + "TO ": 482 | ||
78 | + }, | ||
79 | + { | ||
80 | + "TYPE": "conjunct", | ||
81 | + "FROM": 483, | ||
82 | + "TO ": 520 | ||
83 | + } | ||
84 | + ] | ||
85 | + }, | ||
86 | + { | ||
87 | + "TYPE": "prep or prep phrase coordination", | ||
88 | + "TEXT": "on resting human tonsillar B cells and on B cell lines", | ||
89 | + "FROM": 560, | ||
90 | + "TO ": 614, | ||
91 | + "COMP": [ | ||
92 | + { | ||
93 | + "TYPE": "conjunct", | ||
94 | + "FROM": 560, | ||
95 | + "TO ": 594 | ||
96 | + }, | ||
97 | + { | ||
98 | + "TYPE": "conjunction", | ||
99 | + "FROM": 595, | ||
100 | + "TO ": 598 | ||
101 | + }, | ||
102 | + { | ||
103 | + "TYPE": "conjunct", | ||
104 | + "FROM": 599, | ||
105 | + "TO ": 614 | ||
106 | + } | ||
107 | + ] | ||
108 | + } | ||
109 | + ] | ||
110 | +} | ||
111 | +{ | ||
112 | + "TYPE": "sentence", | ||
113 | + "TEXT": "The activation is rapid and is mediated through a tyrosine kinase-dependent pathway.", | ||
114 | + "FROM": 616, | ||
115 | + "TO ": 700, | ||
116 | + "POS ": "The_DT activation_NN is_VBZ rapid_JJ and_CC is_VBZ mediated_VBN through_IN a_DT tyrosine_NN kinase-dependent_JJ pathway_NN ._. ", | ||
117 | + "TREE": "(ROOT (S (NP (DT The) (NN activation)) (VP (VP (VBZ is) (ADJP (JJ rapid))) (CC and) (VP (VBZ is) (VP (VBN mediated) (PP (IN through) (NP (DT a) (NN tyrosine) (JJ kinase-dependent) (NN pathway)))))) (. .)))", | ||
118 | + "SIMP": [ | ||
119 | + { | ||
120 | + "TYPE": "verb or verb phrase coordination", | ||
121 | + "TEXT": "is rapid and is mediated through a tyrosine kinase-dependent pathway", | ||
122 | + "FROM": 631, | ||
123 | + "TO ": 699, | ||
124 | + "COMP": [ | ||
125 | + { | ||
126 | + "TYPE": "conjunct", | ||
127 | + "FROM": 631, | ||
128 | + "TO ": 639 | ||
129 | + }, | ||
130 | + { | ||
131 | + "TYPE": "conjunction", | ||
132 | + "FROM": 640, | ||
133 | + "TO ": 643 | ||
134 | + }, | ||
135 | + { | ||
136 | + "TYPE": "conjunct", | ||
137 | + "FROM": 644, | ||
138 | + "TO ": 699 | ||
139 | + } | ||
140 | + ] | ||
141 | + } | ||
142 | + ] | ||
143 | +} | ||
144 | +{ | ||
145 | + "TYPE": "sentence", | ||
146 | + "TEXT": "The complexes detected in electrophoretic mobility shift assays contain p50, p65 (RelA), c-Rel, and most likely other components.", | ||
147 | + "FROM": 701, | ||
148 | + "TO ": 830, | ||
149 | + "POS ": "The_DT complexes_NNS detected_VBN in_IN electrophoretic_JJ mobility_NN shift_NN assays_NNS contain_VBP p50_NN ,_, p65_NN -LRB-_-LRB- RelA_NN -RRB-_-RRB- ,_, c-Rel_NN ,_, and_CC most_RBS likely_JJ other_JJ components_NNS ._. ", | ||
150 | + "TREE": "(ROOT (S (NP (NP (DT The) (NNS complexes)) (VP (VBN detected) (PP (IN in) (NP (JJ electrophoretic) (NN mobility) (NN shift) (NNS assays))))) (VP (VBP contain) (NP (NP (NN p50) (, ,) (NN p65) (PRN (-LRB- -LRB-) (NN RelA) (-RRB- -RRB-))) (, ,) (NP (NN c-Rel)) (, ,) (CC and) (NP (ADJP (RBS most) (JJ likely)) (JJ other) (NNS components)))) (. .)))", | ||
151 | + "SIMP": [ | ||
152 | + { | ||
153 | + "TYPE": "reduced relative clause", | ||
154 | + "TEXT": "The complexes detected in electrophoretic mobility shift assays", | ||
155 | + "FROM": 701, | ||
156 | + "TO ": 764, | ||
157 | + "COMP": [ | ||
158 | + { | ||
159 | + "TYPE": "referred noun phrase", | ||
160 | + "FROM": 701, | ||
161 | + "TO ": 714 | ||
162 | + }, | ||
163 | + { | ||
164 | + "TYPE": "clause", | ||
165 | + "FROM": 715, | ||
166 | + "TO ": 764 | ||
167 | + } | ||
168 | + ] | ||
169 | + }, | ||
170 | + { | ||
171 | + "TYPE": "noun or noun phrase coordination", | ||
172 | + "TEXT": "p50, p65 (RelA), c-Rel, and most likely other components", | ||
173 | + "FROM": 773, | ||
174 | + "TO ": 829, | ||
175 | + "COMP": [ | ||
176 | + { | ||
177 | + "TYPE": "conjunct", | ||
178 | + "FROM": 773, | ||
179 | + "TO ": 788 | ||
180 | + }, | ||
181 | + { | ||
182 | + "TYPE": "conjunct", | ||
183 | + "FROM": 790, | ||
184 | + "TO ": 795 | ||
185 | + }, | ||
186 | + { | ||
187 | + "TYPE": "conjunction", | ||
188 | + "FROM": 797, | ||
189 | + "TO ": 800 | ||
190 | + }, | ||
191 | + { | ||
192 | + "TYPE": "conjunct", | ||
193 | + "FROM": 801, | ||
194 | + "TO ": 829 | ||
195 | + } | ||
196 | + ] | ||
197 | + } | ||
198 | + ] | ||
199 | +} | ||
200 | +{ | ||
201 | + "TYPE": "sentence", | ||
202 | + "TEXT": "By using transient transfection assays, we found that cross-linking CD40 supports NF-kappa B-dependent gene expression.", | ||
203 | + "FROM": 831, | ||
204 | + "TO ": 950, | ||
205 | + "POS ": "By_IN using_VBG transient_JJ transfection_NN assays_NNS ,_, we_PRP found_VBD that_IN cross-linking_JJ CD40_NN supports_VBZ NF-kappa_NN B-dependent_JJ gene_NN expression_NN ._. ", | ||
206 | + "TREE": "(ROOT (S (PP (IN By) (S (VP (VBG using) (NP (JJ transient) (NN transfection) (NNS assays))))) (, ,) (NP (PRP we)) (VP (VBD found) (SBAR (IN that) (S (NP (JJ cross-linking) (NN CD40)) (VP (VBZ supports) (NP (NP (NN NF-kappa)) (NP (JJ B-dependent) (NN gene) (NN expression))))))) (. .)))", | ||
207 | + "SIMP": [] | ||
208 | +} | ||
209 | +{ | ||
210 | + "TYPE": "sentence", | ||
211 | + "TEXT": "Our results define the NF-kappa B system as an intermediate event in CD40 signaling and suggest that the CD40 pathway can influence the expression of B cell-associated genes with NF-kappa B consensus sites.", | ||
212 | + "FROM": 951, | ||
213 | + "TO ": 1157, | ||
214 | + "POS ": "Our_PRP$ results_NNS define_VBP the_DT NF-kappa_NN B_NN system_NN as_IN an_DT intermediate_JJ event_NN in_IN CD40_NN signaling_NN and_CC suggest_VBP that_IN the_DT CD40_NN pathway_NN can_MD influence_VB the_DT expression_NN of_IN B_NN cell-associated_JJ genes_NNS with_IN NF-kappa_NN B_NN consensus_NN sites_NNS ._. ", | ||
215 | + "TREE": "(ROOT (S (NP (PRP$ Our) (NNS results)) (VP (VP (VBP define) (NP (DT the) (NN NF-kappa) (NN B) (NN system)) (PP (IN as) (NP (NP (DT an) (JJ intermediate) (NN event)) (PP (IN in) (NP (NN CD40) (NN signaling)))))) (CC and) (VP (VBP suggest) (SBAR (IN that) (S (NP (DT the) (NN CD40) (NN pathway)) (VP (MD can) (VP (VB influence) (NP (NP (DT the) (NN expression)) (PP (IN of) (NP (NN B) (JJ cell-associated) (NNS genes)))) (PP (IN with) (NP (NN NF-kappa) (NN B) (NN consensus) (NNS sites))))))))) (. .)))", | ||
216 | + "SIMP": [ | ||
217 | + { | ||
218 | + "TYPE": "verb or verb phrase coordination", | ||
219 | + "TEXT": "define the NF-kappa B system as an intermediate event in CD40 signaling and suggest that the CD40 pathway can influence the expression of B cell-associated genes with NF-kappa B consensus sites", | ||
220 | + "FROM": 963, | ||
221 | + "TO ": 1156, | ||
222 | + "COMP": [ | ||
223 | + { | ||
224 | + "TYPE": "conjunct", | ||
225 | + "FROM": 963, | ||
226 | + "TO ": 1034 | ||
227 | + }, | ||
228 | + { | ||
229 | + "TYPE": "conjunction", | ||
230 | + "FROM": 1035, | ||
231 | + "TO ": 1038 | ||
232 | + }, | ||
233 | + { | ||
234 | + "TYPE": "conjunct", | ||
235 | + "FROM": 1039, | ||
236 | + "TO ": 1156 | ||
237 | + } | ||
238 | + ] | ||
239 | + } | ||
240 | + ] | ||
241 | +} |
isimp_v2/testcases/foo.json
0 → 100644
1 | +{ | ||
2 | + "TYPE": "sentence", | ||
3 | + "TEXT": "The B cell-associated surface molecule CD40 functions to regulate B cell responses.", | ||
4 | + "FROM": 0, | ||
5 | + "TO ": 83, | ||
6 | + "POS ": "The_DT B_NN cell-associated_JJ surface_NN molecule_NN CD40_NN functions_VBZ to_TO regulate_VB B_NN cell_NN responses_NNS ._. ", | ||
7 | + "TREE": "(ROOT (S (NP (DT The) (NN B) (JJ cell-associated) (NN surface) (NN molecule) (NN CD40)) (VP (VBZ functions) (S (VP (TO to) (VP (VB regulate) (NP (NN B) (NN cell) (NNS responses)))))) (. .)))", | ||
8 | + "SIMP": [] | ||
9 | +} | ||
10 | +{ | ||
11 | + "TYPE": "sentence", | ||
12 | + "TEXT": "Cross-linking CD40 on B cells can lead to homotypic cell adhesion, IL-6 production, and, in combination with cytokines, to Ig isotype switching.", | ||
13 | + "FROM": 84, | ||
14 | + "TO ": 228, | ||
15 | + "POS ": "Cross-linking_NN CD40_NN on_IN B_NN cells_NNS can_MD lead_VB to_TO homotypic_JJ cell_NN adhesion_NN ,_, IL-6_NN production_NN ,_, and_CC ,_, in_IN combination_NN with_IN cytokines_NNS ,_, to_TO Ig_NN isotype_NN switching_NN ._. ", | ||
16 | + "TREE": "(ROOT (S (NP (NP (NN Cross-linking) (NN CD40)) (PP (IN on) (NP (NN B) (NNS cells)))) (VP (MD can) (VP (VB lead) (PP (PP (TO to) (NP (NP (JJ homotypic) (NN cell) (NN adhesion)) (, ,) (NP (NN IL-6) (NN production)) (, ,))) (CC and) (PRN (, ,) (PP (IN in) (NP (NP (NN combination)) (PP (IN with) (NP (NNS cytokines))))) (, ,)) (PP (TO to) (NP (NN Ig) (NN isotype) (NN switching)))))) (. .)))", | ||
17 | + "SIMP": [] | ||
18 | +} | ||
19 | +{ | ||
20 | + "TYPE": "sentence", | ||
21 | + "TEXT": "Tyrosine kinase activity is increased shortly after engagement of this receptor.", | ||
22 | + "FROM": 229, | ||
23 | + "TO ": 309, | ||
24 | + "POS ": "Tyrosine_NN kinase_NN activity_NN is_VBZ increased_VBN shortly_RB after_IN engagement_NN of_IN this_DT receptor_NN ._. ", | ||
25 | + "TREE": "(ROOT (S (NP (NN Tyrosine) (NN kinase) (NN activity)) (VP (VBZ is) (VP (VBN increased) (ADVP (RB shortly)) (PP (IN after) (NP (NP (NN engagement)) (PP (IN of) (NP (DT this) (NN receptor))))))) (. .)))", | ||
26 | + "SIMP": [] | ||
27 | +} | ||
28 | +{ | ||
29 | + "TYPE": "sentence", | ||
30 | + "TEXT": "Little is known about how the very early events induced by CD40 cross-linking link to cellular responses.", | ||
31 | + "FROM": 310, | ||
32 | + "TO ": 415, | ||
33 | + "POS ": "Little_JJ is_VBZ known_VBN about_IN how_WRB the_DT very_RB early_JJ events_NNS induced_VBN by_IN CD40_NN cross-linking_NN link_NN to_TO cellular_JJ responses_NNS ._. ", | ||
34 | + "TREE": "(ROOT (S (NP (JJ Little)) (VP (VBZ is) (VP (VBN known) (PP (IN about) (SBAR (WHADVP (WRB how)) (S (NP (DT the) (ADJP (RB very) (JJ early)) (NNS events)) (VP (VBN induced) (PP (IN by) (NP (NP (NN CD40) (NN cross-linking) (NN link)) (PP (TO to) (NP (JJ cellular) (NNS responses))))))))))) (. .)))", | ||
35 | + "SIMP": [] | ||
36 | +} | ||
37 | +{ | ||
38 | + "TYPE": "sentence", | ||
39 | + "TEXT": "In this study, we demonstrate that nuclear factor (NF)-kappa B and NF-kappa B-like transcription factors are activated after cross-linking CD40 on resting human tonsillar B cells and on B cell lines.", | ||
40 | + "FROM": 416, | ||
41 | + "TO ": 615, | ||
42 | + "POS ": "In_IN this_DT study_NN ,_, we_PRP demonstrate_VBP that_IN nuclear_JJ factor_NN -LRB-_-LRB- NF_NN -RRB-_-RRB- -_: kappa_NN B_NN and_CC NF-kappa_NN B-like_JJ transcription_NN factors_NNS are_VBP activated_VBN after_IN cross-linking_JJ CD40_NN on_IN resting_VBG human_JJ tonsillar_JJ B_NN cells_NNS and_CC on_IN B_NN cell_NN lines_NNS ._. ", | ||
43 | + "TREE": "(ROOT (S (PP (IN In) (NP (DT this) (NN study))) (, ,) (NP (PRP we)) (VP (VBP demonstrate) (SBAR (IN that) (S (NP (NP (JJ nuclear) (NN factor)) (PRN (-LRB- -LRB-) (NP (NN NF)) (-RRB- -RRB-))) (: -) (NP (NP (NN kappa) (NN B)) (CC and) (NP (NN NF-kappa) (JJ B-like) (NN transcription) (NNS factors))) (VP (VBP are) (VP (VBN activated) (PP (IN after) (NP (JJ cross-linking) (NN CD40))) (PP (PP (IN on) (S (VP (VBG resting) (NP (JJ human) (JJ tonsillar) (NN B) (NNS cells))))) (CC and) (PP (IN on) (NP (NN B) (NN cell) (NNS lines))))))))) (. .)))", | ||
44 | + "SIMP": [ | ||
45 | + { | ||
46 | + "TYPE": "parenthesis", | ||
47 | + "TEXT": "nuclear factor (NF", | ||
48 | + "FROM": 451, | ||
49 | + "TO ": 469, | ||
50 | + "COMP": [ | ||
51 | + { | ||
52 | + "TYPE": "referred noun phrase", | ||
53 | + "FROM": 451, | ||
54 | + "TO ": 465 | ||
55 | + }, | ||
56 | + { | ||
57 | + "TYPE": "parenthesized elements", | ||
58 | + "FROM": 467, | ||
59 | + "TO ": 469 | ||
60 | + } | ||
61 | + ] | ||
62 | + }, | ||
63 | + { | ||
64 | + "TYPE": "noun or noun phrase coordination", | ||
65 | + "TEXT": "kappa B and NF-kappa B-like transcription factors", | ||
66 | + "FROM": 471, | ||
67 | + "TO ": 520, | ||
68 | + "COMP": [ | ||
69 | + { | ||
70 | + "TYPE": "conjunct", | ||
71 | + "FROM": 471, | ||
72 | + "TO ": 478 | ||
73 | + }, | ||
74 | + { | ||
75 | + "TYPE": "conjunction", | ||
76 | + "FROM": 479, | ||
77 | + "TO ": 482 | ||
78 | + }, | ||
79 | + { | ||
80 | + "TYPE": "conjunct", | ||
81 | + "FROM": 483, | ||
82 | + "TO ": 520 | ||
83 | + } | ||
84 | + ] | ||
85 | + }, | ||
86 | + { | ||
87 | + "TYPE": "prep or prep phrase coordination", | ||
88 | + "TEXT": "on resting human tonsillar B cells and on B cell lines", | ||
89 | + "FROM": 560, | ||
90 | + "TO ": 614, | ||
91 | + "COMP": [ | ||
92 | + { | ||
93 | + "TYPE": "conjunct", | ||
94 | + "FROM": 560, | ||
95 | + "TO ": 594 | ||
96 | + }, | ||
97 | + { | ||
98 | + "TYPE": "conjunction", | ||
99 | + "FROM": 595, | ||
100 | + "TO ": 598 | ||
101 | + }, | ||
102 | + { | ||
103 | + "TYPE": "conjunct", | ||
104 | + "FROM": 599, | ||
105 | + "TO ": 614 | ||
106 | + } | ||
107 | + ] | ||
108 | + } | ||
109 | + ] | ||
110 | +} | ||
111 | +{ | ||
112 | + "TYPE": "sentence", | ||
113 | + "TEXT": "The activation is rapid and is mediated through a tyrosine kinase-dependent pathway.", | ||
114 | + "FROM": 616, | ||
115 | + "TO ": 700, | ||
116 | + "POS ": "The_DT activation_NN is_VBZ rapid_JJ and_CC is_VBZ mediated_VBN through_IN a_DT tyrosine_NN kinase-dependent_JJ pathway_NN ._. ", | ||
117 | + "TREE": "(ROOT (S (NP (DT The) (NN activation)) (VP (VP (VBZ is) (ADJP (JJ rapid))) (CC and) (VP (VBZ is) (VP (VBN mediated) (PP (IN through) (NP (DT a) (NN tyrosine) (JJ kinase-dependent) (NN pathway)))))) (. .)))", | ||
118 | + "SIMP": [ | ||
119 | + { | ||
120 | + "TYPE": "verb or verb phrase coordination", | ||
121 | + "TEXT": "is rapid and is mediated through a tyrosine kinase-dependent pathway", | ||
122 | + "FROM": 631, | ||
123 | + "TO ": 699, | ||
124 | + "COMP": [ | ||
125 | + { | ||
126 | + "TYPE": "conjunct", | ||
127 | + "FROM": 631, | ||
128 | + "TO ": 639 | ||
129 | + }, | ||
130 | + { | ||
131 | + "TYPE": "conjunction", | ||
132 | + "FROM": 640, | ||
133 | + "TO ": 643 | ||
134 | + }, | ||
135 | + { | ||
136 | + "TYPE": "conjunct", | ||
137 | + "FROM": 644, | ||
138 | + "TO ": 699 | ||
139 | + } | ||
140 | + ] | ||
141 | + } | ||
142 | + ] | ||
143 | +} | ||
144 | +{ | ||
145 | + "TYPE": "sentence", | ||
146 | + "TEXT": "The complexes detected in electrophoretic mobility shift assays contain p50, p65 (RelA), c-Rel, and most likely other components.", | ||
147 | + "FROM": 701, | ||
148 | + "TO ": 830, | ||
149 | + "POS ": "The_DT complexes_NNS detected_VBN in_IN electrophoretic_JJ mobility_NN shift_NN assays_NNS contain_VBP p50_NN ,_, p65_NN -LRB-_-LRB- RelA_NN -RRB-_-RRB- ,_, c-Rel_NN ,_, and_CC most_RBS likely_JJ other_JJ components_NNS ._. ", | ||
150 | + "TREE": "(ROOT (S (NP (NP (DT The) (NNS complexes)) (VP (VBN detected) (PP (IN in) (NP (JJ electrophoretic) (NN mobility) (NN shift) (NNS assays))))) (VP (VBP contain) (NP (NP (NN p50) (, ,) (NN p65) (PRN (-LRB- -LRB-) (NN RelA) (-RRB- -RRB-))) (, ,) (NP (NN c-Rel)) (, ,) (CC and) (NP (ADJP (RBS most) (JJ likely)) (JJ other) (NNS components)))) (. .)))", | ||
151 | + "SIMP": [ | ||
152 | + { | ||
153 | + "TYPE": "reduced relative clause", | ||
154 | + "TEXT": "The complexes detected in electrophoretic mobility shift assays", | ||
155 | + "FROM": 701, | ||
156 | + "TO ": 764, | ||
157 | + "COMP": [ | ||
158 | + { | ||
159 | + "TYPE": "referred noun phrase", | ||
160 | + "FROM": 701, | ||
161 | + "TO ": 714 | ||
162 | + }, | ||
163 | + { | ||
164 | + "TYPE": "clause", | ||
165 | + "FROM": 715, | ||
166 | + "TO ": 764 | ||
167 | + } | ||
168 | + ] | ||
169 | + }, | ||
170 | + { | ||
171 | + "TYPE": "noun or noun phrase coordination", | ||
172 | + "TEXT": "p50, p65 (RelA), c-Rel, and most likely other components", | ||
173 | + "FROM": 773, | ||
174 | + "TO ": 829, | ||
175 | + "COMP": [ | ||
176 | + { | ||
177 | + "TYPE": "conjunct", | ||
178 | + "FROM": 773, | ||
179 | + "TO ": 788 | ||
180 | + }, | ||
181 | + { | ||
182 | + "TYPE": "conjunct", | ||
183 | + "FROM": 790, | ||
184 | + "TO ": 795 | ||
185 | + }, | ||
186 | + { | ||
187 | + "TYPE": "conjunction", | ||
188 | + "FROM": 797, | ||
189 | + "TO ": 800 | ||
190 | + }, | ||
191 | + { | ||
192 | + "TYPE": "conjunct", | ||
193 | + "FROM": 801, | ||
194 | + "TO ": 829 | ||
195 | + } | ||
196 | + ] | ||
197 | + } | ||
198 | + ] | ||
199 | +} | ||
200 | +{ | ||
201 | + "TYPE": "sentence", | ||
202 | + "TEXT": "By using transient transfection assays, we found that cross-linking CD40 supports NF-kappa B-dependent gene expression.", | ||
203 | + "FROM": 831, | ||
204 | + "TO ": 950, | ||
205 | + "POS ": "By_IN using_VBG transient_JJ transfection_NN assays_NNS ,_, we_PRP found_VBD that_IN cross-linking_JJ CD40_NN supports_VBZ NF-kappa_NN B-dependent_JJ gene_NN expression_NN ._. ", | ||
206 | + "TREE": "(ROOT (S (PP (IN By) (S (VP (VBG using) (NP (JJ transient) (NN transfection) (NNS assays))))) (, ,) (NP (PRP we)) (VP (VBD found) (SBAR (IN that) (S (NP (JJ cross-linking) (NN CD40)) (VP (VBZ supports) (NP (NP (NN NF-kappa)) (NP (JJ B-dependent) (NN gene) (NN expression))))))) (. .)))", | ||
207 | + "SIMP": [] | ||
208 | +} | ||
209 | +{ | ||
210 | + "TYPE": "sentence", | ||
211 | + "TEXT": "Our results define the NF-kappa B system as an intermediate event in CD40 signaling and suggest that the CD40 pathway can influence the expression of B cell-associated genes with NF-kappa B consensus sites.", | ||
212 | + "FROM": 951, | ||
213 | + "TO ": 1157, | ||
214 | + "POS ": "Our_PRP$ results_NNS define_VBP the_DT NF-kappa_NN B_NN system_NN as_IN an_DT intermediate_JJ event_NN in_IN CD40_NN signaling_NN and_CC suggest_VBP that_IN the_DT CD40_NN pathway_NN can_MD influence_VB the_DT expression_NN of_IN B_NN cell-associated_JJ genes_NNS with_IN NF-kappa_NN B_NN consensus_NN sites_NNS ._. ", | ||
215 | + "TREE": "(ROOT (S (NP (PRP$ Our) (NNS results)) (VP (VP (VBP define) (NP (DT the) (NN NF-kappa) (NN B) (NN system)) (PP (IN as) (NP (NP (DT an) (JJ intermediate) (NN event)) (PP (IN in) (NP (NN CD40) (NN signaling)))))) (CC and) (VP (VBP suggest) (SBAR (IN that) (S (NP (DT the) (NN CD40) (NN pathway)) (VP (MD can) (VP (VB influence) (NP (NP (DT the) (NN expression)) (PP (IN of) (NP (NN B) (JJ cell-associated) (NNS genes)))) (PP (IN with) (NP (NN NF-kappa) (NN B) (NN consensus) (NNS sites))))))))) (. .)))", | ||
216 | + "SIMP": [ | ||
217 | + { | ||
218 | + "TYPE": "verb or verb phrase coordination", | ||
219 | + "TEXT": "define the NF-kappa B system as an intermediate event in CD40 signaling and suggest that the CD40 pathway can influence the expression of B cell-associated genes with NF-kappa B consensus sites", | ||
220 | + "FROM": 963, | ||
221 | + "TO ": 1156, | ||
222 | + "COMP": [ | ||
223 | + { | ||
224 | + "TYPE": "conjunct", | ||
225 | + "FROM": 963, | ||
226 | + "TO ": 1034 | ||
227 | + }, | ||
228 | + { | ||
229 | + "TYPE": "conjunction", | ||
230 | + "FROM": 1035, | ||
231 | + "TO ": 1038 | ||
232 | + }, | ||
233 | + { | ||
234 | + "TYPE": "conjunct", | ||
235 | + "FROM": 1039, | ||
236 | + "TO ": 1156 | ||
237 | + } | ||
238 | + ] | ||
239 | + } | ||
240 | + ] | ||
241 | +} |
isimp_v2/testcases/foo.txt
0 → 100644
1 | +The B cell-associated surface molecule CD40 functions to regulate B cell responses. Cross-linking CD40 on B cells can lead to homotypic cell adhesion, IL-6 production, and, in combination with cytokines, to Ig isotype switching. Tyrosine kinase activity is increased shortly after engagement of this receptor. Little is known about how the very early events induced by CD40 cross-linking link to cellular responses. In this study, we demonstrate that nuclear factor (NF)-kappa B and NF-kappa B-like transcription factors are activated after cross-linking CD40 on resting human tonsillar B cells and on B cell lines. The activation is rapid and is mediated through a tyrosine kinase-dependent pathway. The complexes detected in electrophoretic mobility shift assays contain p50, p65 (RelA), c-Rel, and most likely other components. By using transient transfection assays, we found that cross-linking CD40 supports NF-kappa B-dependent gene expression. Our results define the NF-kappa B system as an intermediate event in CD40 signaling and suggest that the CD40 pathway can influence the expression of B cell-associated genes with NF-kappa B consensus sites. | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
sentence-simplification-main.sh
0 → 100755
1 | +#!/bin/bash | ||
2 | + | ||
3 | +#Validate arguments | ||
4 | +if [[ ! ("$#" == 2 ) ]]; then | ||
5 | + echo 'Usage: ./sentence-simplification-main.sh <input_path> <output_file_path>' | ||
6 | + exit 1 | ||
7 | +fi | ||
8 | + | ||
9 | +SCRIPT_PATH=$(cd `dirname $0` && pwd) | ||
10 | +#Define aquí la palabra clave del grupo de oraciones a simplificar. | ||
11 | +INPUT_PATH=$1 | ||
12 | +OUTPUT_INDEX_FILE_PATH=$2 | ||
13 | +cd $SCRIPT_PATH | ||
14 | + | ||
15 | + | ||
16 | + | ||
17 | + | ||
18 | +#ANALIZAR EN ISIMP | ||
19 | +echo "Analysing in iSimp..." | ||
20 | +if [ -z "$(ls -A ./iSimp_sentences/)" ]; then : | ||
21 | +else | ||
22 | + #echo "Not Empty" | ||
23 | + rm ./iSimp_sentences/* | ||
24 | +fi | ||
25 | +#cd $INPUT_PATH | ||
26 | +for j in $INPUT_PATH/* | ||
27 | +do | ||
28 | + echo $j | ||
29 | + #echo "++++entrada_simp: $j salida_simp: $SCRIPT_PATH/iSimp_sentences/$(basename $j)" | ||
30 | + $SCRIPT_PATH/isimp_v2/simplify.sh $j $SCRIPT_PATH/iSimp_sentences/$(basename $j) | ||
31 | +done | ||
32 | +cd $SCRIPT_PATH | ||
33 | + | ||
34 | +#CREA INDICE DE ARCHIVOS SIMPLIFICADOS | ||
35 | +#touch $SCRIPT_PATH/index.txt | ||
36 | +>| $OUTPUT_INDEX_FILE_PATH | ||
37 | + | ||
38 | +#ALIMENTAR A ALGORITMO | ||
39 | +echo "Analysing in Algorithm..." | ||
40 | +if [ -z "$(ls -A ./algorithm_sentences/)" ]; then : | ||
41 | +else | ||
42 | + #echo "Not Empty" | ||
43 | + rm ./algorithm_sentences/* | ||
44 | +fi | ||
45 | +#cd ./iSimp_sentences | ||
46 | +for k in $SCRIPT_PATH/iSimp_sentences/* | ||
47 | +do | ||
48 | + echo $k | ||
49 | + #echo "entrada: $k salida: $SCRIPT_PATH/algorithm_sentences/$(basename $k) index: $OUTPUT_INDEX_FILE_PATH" | ||
50 | + python2 $SCRIPT_PATH/simplifier.py $k $SCRIPT_PATH/algorithm_sentences/$(basename $k) $OUTPUT_INDEX_FILE_PATH | ||
51 | +done | ||
52 | +cd $SCRIPT_PATH |
simplifier.py
0 → 100644
1 | +import copy | ||
2 | +import sys | ||
3 | +import requests | ||
4 | + | ||
5 | +class Simp(object): | ||
6 | + def __init__(self): | ||
7 | + self.TYPE="" | ||
8 | + self.TYPEx=0 | ||
9 | + self.TYPEy=0 | ||
10 | + self.TEXT="" | ||
11 | + self.COMP=[] | ||
12 | + def agregarTYPE(self,Type): | ||
13 | + self.TYPE=Type | ||
14 | + def agregarTEXT(self,text): | ||
15 | + self.TEXT=text | ||
16 | + def agregarCOMP(self,comp): | ||
17 | + self.COMP.append(comp) | ||
18 | + | ||
19 | +class Frase(object): | ||
20 | + def __init__(self): | ||
21 | + self.TYPE="" | ||
22 | + self.TEXT="" | ||
23 | + self.POS="" | ||
24 | + self.TREE="" | ||
25 | + self.SIMP=[] | ||
26 | + def agregarTYPE(self,Type): | ||
27 | + self.TYPE=Type | ||
28 | + def agregarTEXT(self,text): | ||
29 | + self.TEXT=text | ||
30 | + def agregarPOS(self,Pos): | ||
31 | + self.POS=Pos | ||
32 | + def agregarTREE(self,Tree): | ||
33 | + self.TREE=Tree | ||
34 | + def agregarSIMP(self): | ||
35 | + self.SIMP.append(Simp()) | ||
36 | + | ||
37 | +class Sentence(object): | ||
38 | + def __init__(self): | ||
39 | + self.FLAG=True | ||
40 | + self.TEXT="" | ||
41 | + self.TREE="" | ||
42 | + self.SIMP=[] | ||
43 | + def agregarTEXT(self,text): | ||
44 | + self.TEXT=text | ||
45 | + def agregarTREE(self,Tree): | ||
46 | + self.TREE=Tree | ||
47 | + def agregarSIMP(self): | ||
48 | + self.SIMP.append(Simp()) | ||
49 | + | ||
50 | + | ||
51 | +MEMORIAB=[] | ||
52 | +MEMORIAA=[] | ||
53 | + | ||
54 | + | ||
55 | +#----lectura de datos desde archivo | ||
56 | +arch=(sys.argv[1]) | ||
57 | +f = open(arch) | ||
58 | +dato = f.read().splitlines() | ||
59 | +f.close | ||
60 | +frase=Frase() | ||
61 | +for i in range(len(dato)): | ||
62 | + if 'TYPE: ' in dato[i][0:6]: | ||
63 | + frase.agregarTYPE(dato[i][6:]) | ||
64 | + elif 'TEXT: ' in dato[i][0:6]: | ||
65 | + frase.agregarTEXT(dato[i][6:]) | ||
66 | + elif 'POS : ' in dato[i][0:6]: | ||
67 | + frase.agregarPOS(dato[i][6:]) | ||
68 | + elif 'TREE: ' in dato[i][0:6]: | ||
69 | + frase.agregarTREE(dato[i][6:]) | ||
70 | + elif 'SIMP:' in dato[i]: | ||
71 | + frase.agregarSIMP() | ||
72 | + elif ' TYPE: ' in dato[i][0:8]: | ||
73 | + frase.SIMP[-1].agregarTYPE(dato[i][8:]) | ||
74 | + elif ' TEXT: ' in dato[i][0:8]: | ||
75 | + frase.SIMP[-1].agregarTEXT(dato[i][8:]) | ||
76 | + elif ' COMP: ' in dato[i]: | ||
77 | + frase.SIMP[-1].agregarCOMP(dato[i][8:]) | ||
78 | +#------------ | ||
79 | + | ||
80 | + | ||
81 | +#-------Programa principal | ||
82 | +#Algoritmo v4 | ||
83 | + | ||
84 | + | ||
85 | +if ((frase.TYPE.find('sentence')) !=- 1) and (frase.SIMP!=[]) and (frase.SIMP[0].TYPE != ''): | ||
86 | + y=1 | ||
87 | + w=1 | ||
88 | + SIMPworkspace=[] | ||
89 | + # copia TREE y cada SIMP a SENTENCE.1 | ||
90 | + Sentence1=Sentence() | ||
91 | + Sentence1.TREE=copy.deepcopy(frase.TREE) | ||
92 | + Sentence1.TEXT=copy.deepcopy(frase.TEXT) | ||
93 | + for i in range(len(frase.SIMP)): | ||
94 | + #Sentence1.SIMP.append(Simp()) | ||
95 | + #Sentence1.SIMP[i]=copy.deepcopy(frase.SIMP[i]) | ||
96 | + SIMPworkspace.append(Simp()) | ||
97 | + SIMPworkspace[i]=copy.deepcopy(frase.SIMP[i]) | ||
98 | + | ||
99 | +## ORDENAMIENTO DE SIMPs | ||
100 | + for i in range(len(SIMPworkspace)): | ||
101 | + #print SIMPworkspace[i].TEXT | ||
102 | + #print SIMPworkspace[i].TYPE | ||
103 | + SIMPworkspace[i].TYPEx = int(SIMPworkspace[i].TYPE[SIMPworkspace[i].TYPE.find('[')+1:SIMPworkspace[i].TYPE.find('..')]) | ||
104 | + SIMPworkspace[i].TYPEy = int(SIMPworkspace[i].TYPE[SIMPworkspace[i].TYPE.find('..')+2:SIMPworkspace[i].TYPE.find(']')]) | ||
105 | + if 'parenthesis' in SIMPworkspace[i].TYPE: | ||
106 | + SIMPworkspace[i].TYPEy = SIMPworkspace[i].TYPEy + 2 | ||
107 | + #print SIMPworkspace[i].TYPEx | ||
108 | + #print SIMPworkspace[i].TYPEy | ||
109 | + | ||
110 | + | ||
111 | + SIMPworkspace.sort(key=lambda x: x.TYPEy, reverse=True) | ||
112 | + SIMPworkspace.sort(key=lambda x: x.TYPEx) | ||
113 | + | ||
114 | + | ||
115 | + # for i in range(len(SIMPworkspace)): | ||
116 | + # print "\nSIMP " + str(i) + " :" | ||
117 | + # print SIMPworkspace[i].TYPE | ||
118 | + # print SIMPworkspace[i].TYPEx | ||
119 | + # print SIMPworkspace[i].TYPEy | ||
120 | + # print "\n" | ||
121 | + | ||
122 | + for i in range(len(SIMPworkspace)): | ||
123 | + Sentence1.SIMP.append(Simp()) | ||
124 | + Sentence1.SIMP[i]=copy.deepcopy(SIMPworkspace[i]) | ||
125 | + | ||
126 | + | ||
127 | + # Agrega la oracion original Sentence1 a la memoria como primer objeto en ser analizado | ||
128 | + MEMORIAB.append(Sentence()) | ||
129 | + MEMORIAB[0]=copy.deepcopy(Sentence1) | ||
130 | + | ||
131 | + | ||
132 | + | ||
133 | + # 1 entrada al bucle A por cada SIMP diferente en Sentence1 | ||
134 | + numSimp=len(Sentence1.SIMP) | ||
135 | + s = 0 | ||
136 | + #bucle A | ||
137 | + while s < numSimp : | ||
138 | + #print "\nEntro por vez " + str(s) + " al bucle A" | ||
139 | + #print "Analizando todos los SIMP de tipo: " + MEMORIAB[0].SIMP[s].TYPE | ||
140 | + #Entra al bucle B el numero de veces igual al numerode elementos en MEMORIAB | ||
141 | + numMEM = len(MEMORIAB) | ||
142 | + t = 0 | ||
143 | + #bucle B | ||
144 | + while t < numMEM : | ||
145 | + #print "Entro por vez " + str(t) + " al bucle B" | ||
146 | + #Entra si la oracion no ha sido analizada antes (FLAG==True) y si el texto del simp esta presente en la oracion. | ||
147 | + #print "CONDICIONES:" | ||
148 | + #print "SIMP " + MEMORIAB[0].SIMP[s].TEXT | ||
149 | + #print "SIMP " + MEMORIAB[0].SIMP[s].TYPE | ||
150 | + #print "MEMB " + str(MEMORIAB[t].FLAG) | ||
151 | + #print "MEMB " + MEMORIAB[t].TEXT | ||
152 | + if ( MEMORIAB[0].SIMP[s].TEXT in MEMORIAB[t].TEXT ) and ( MEMORIAB[t].FLAG == True ): | ||
153 | + MEMORIAB[t].FLAG = False | ||
154 | + #print "False to: " + MEMORIAB[t].TEXT | ||
155 | + #print "Entro a condicional" | ||
156 | + #Reglas de simplificacion | ||
157 | + if ( 'coordination' in MEMORIAB[t].SIMP[s].TYPE ) and ( not ('sentence coordination' in MEMORIAB[t].SIMP[s].TYPE ) ) : | ||
158 | + #print "Aplico regla coord" | ||
159 | + TEMPORALES = [] | ||
160 | + c = len(MEMORIAB[t].SIMP[s].COMP) | ||
161 | + #print "Hay " + str(c) + " COMP en este SIMP" | ||
162 | + tt = 0 | ||
163 | + while c > 0 : | ||
164 | + c = c - 1 | ||
165 | + if ( 'conjunct' in MEMORIAB[0].SIMP[s].COMP[c] ) and ( not ( 'conjunction' in MEMORIAB[0].SIMP[s].COMP[c] ) ) : | ||
166 | + TEMPORALES.append(Sentence()) | ||
167 | + TEMPORALES[tt] = copy.deepcopy(MEMORIAB[t]) | ||
168 | + replaced = MEMORIAB[0].SIMP[s].TEXT | ||
169 | + indice1 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('[')+1:TEMPORALES[tt].SIMP[s].COMP[c].find('..')]) | ||
170 | + indice2 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('..')+2:TEMPORALES[tt].SIMP[s].COMP[c].find(']')]) | ||
171 | + replacer = MEMORIAB[0].TEXT[indice1:indice2] | ||
172 | + TEMPORALES[tt].TEXT = TEMPORALES[tt].TEXT.replace(replaced,replacer) | ||
173 | + tt = tt + 1 | ||
174 | + #copiar simplificaciones de memoria temporal a MEMORIAB | ||
175 | + indtempamem = 0 | ||
176 | + while indtempamem < len(TEMPORALES) : | ||
177 | + MEMORIAB.append(Sentence()) | ||
178 | + MEMORIAB[-1]=copy.deepcopy(TEMPORALES[indtempamem]) | ||
179 | + MEMORIAB[-1].FLAG = True | ||
180 | + #print MEMORIAB[-1].TEXT | ||
181 | + indtempamem = indtempamem + 1 | ||
182 | + elif 'parenthesis' in MEMORIAB[t].SIMP[s].TYPE: | ||
183 | + #print "Aplico regla par" | ||
184 | + TEMPORALES = [] | ||
185 | + c = len(MEMORIAB[t].SIMP[s].COMP) | ||
186 | + #print "Hay " + str(c) + " COMP en este SIMP" | ||
187 | + tt = 0 | ||
188 | + while c > 0 : | ||
189 | + #print "entro al while de par" | ||
190 | + c = c - 1 | ||
191 | + TEMPORALES.append(Sentence()) | ||
192 | + TEMPORALES[tt] = copy.deepcopy(MEMORIAB[t]) | ||
193 | + replaced = MEMORIAB[0].SIMP[s].TEXT + ' )' | ||
194 | + indice1 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('[')+1:TEMPORALES[tt].SIMP[s].COMP[c].find('..')]) | ||
195 | + indice2 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('..')+2:TEMPORALES[tt].SIMP[s].COMP[c].find(']')]) | ||
196 | + replacer = MEMORIAB[0].TEXT[indice1:indice2] | ||
197 | + #print "replaced: " + replaced | ||
198 | + #print "replacer: " + replacer | ||
199 | + TEMPORALES[tt].TEXT = TEMPORALES[tt].TEXT.replace(replaced,replacer) | ||
200 | + tt = tt + 1 | ||
201 | + #copiar simplificaciones de memoria temporal a MEMORIAB | ||
202 | + indtempamem = 0 | ||
203 | + while indtempamem < len(TEMPORALES) : | ||
204 | + MEMORIAB.append(Sentence()) | ||
205 | + MEMORIAB[-1]=copy.deepcopy(TEMPORALES[indtempamem]) | ||
206 | + MEMORIAB[-1].FLAG = True | ||
207 | + #print MEMORIAB[-1].TEXT | ||
208 | + indtempamem = indtempamem + 1 | ||
209 | + elif 'apposition' in MEMORIAB[t].SIMP[s].TYPE: | ||
210 | + #print "Aplico regla Apposition" | ||
211 | + TEMPORALES = [] | ||
212 | + c = len(MEMORIAB[t].SIMP[s].COMP) | ||
213 | + #print "Hay " + str(c) + " COMP en este SIMP" | ||
214 | + tt = 0 | ||
215 | + while c > 0 : | ||
216 | + #print "entro al while de par" | ||
217 | + c = c - 1 | ||
218 | + TEMPORALES.append(Sentence()) | ||
219 | + TEMPORALES[tt] = copy.deepcopy(MEMORIAB[t]) | ||
220 | + replaced = MEMORIAB[0].SIMP[s].TEXT | ||
221 | + indice1 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('[')+1:TEMPORALES[tt].SIMP[s].COMP[c].find('..')]) | ||
222 | + indice2 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('..')+2:TEMPORALES[tt].SIMP[s].COMP[c].find(']')]) | ||
223 | + replacer = MEMORIAB[0].TEXT[indice1:indice2] | ||
224 | + #print "replaced: " + replaced | ||
225 | + #print "replacer: " + replacer | ||
226 | + TEMPORALES[tt].TEXT = TEMPORALES[tt].TEXT.replace(replaced,replacer) | ||
227 | + tt = tt + 1 | ||
228 | + #copiar simplificaciones de memoria temporal a MEMORIAB | ||
229 | + indtempamem = 0 | ||
230 | + while indtempamem < len(TEMPORALES) : | ||
231 | + MEMORIAB.append(Sentence()) | ||
232 | + MEMORIAB[-1]=copy.deepcopy(TEMPORALES[indtempamem]) | ||
233 | + MEMORIAB[-1].FLAG = True | ||
234 | + #print "Copio a memoria: " + MEMORIAB[-1].TEXT | ||
235 | + indtempamem = indtempamem + 1 | ||
236 | + elif 'member-collection' in MEMORIAB[t].SIMP[s].TYPE: | ||
237 | + #print "Aplico regla member-collection" | ||
238 | + TEMPORALES = [] | ||
239 | + c = len(MEMORIAB[t].SIMP[s].COMP) | ||
240 | + #print "Hay " + str(c) + " COMP en este SIMP" | ||
241 | + tt = 0 | ||
242 | + while c > 0 : | ||
243 | + #print "entro al while de mem" | ||
244 | + c = c - 1 | ||
245 | + TEMPORALES.append(Sentence()) | ||
246 | + TEMPORALES[tt] = copy.deepcopy(MEMORIAB[t]) | ||
247 | + replaced = MEMORIAB[0].SIMP[s].TEXT | ||
248 | + indice1 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('[')+1:TEMPORALES[tt].SIMP[s].COMP[c].find('..')]) | ||
249 | + indice2 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('..')+2:TEMPORALES[tt].SIMP[s].COMP[c].find(']')]) | ||
250 | + replacer = MEMORIAB[0].TEXT[indice1:indice2] | ||
251 | + #print "replaced: " + replaced | ||
252 | + #print "replacer: " + replacer | ||
253 | + TEMPORALES[tt].TEXT = TEMPORALES[tt].TEXT.replace(replaced,replacer) | ||
254 | + tt = tt + 1 | ||
255 | + #copiar simplificaciones de memoria temporal a MEMORIAB | ||
256 | + indtempamem = 0 | ||
257 | + while indtempamem < len(TEMPORALES) : | ||
258 | + MEMORIAB.append(Sentence()) | ||
259 | + MEMORIAB[-1]=copy.deepcopy(TEMPORALES[indtempamem]) | ||
260 | + MEMORIAB[-1].FLAG = True | ||
261 | + #print "Copio a memoria: " + MEMORIAB[-1].TEXT | ||
262 | + indtempamem = indtempamem + 1 | ||
263 | + elif 'sentence coordination' in MEMORIAB[t].SIMP[s].TYPE: | ||
264 | + #print "Aplico regla Verb" | ||
265 | + TEMPORALES = [] | ||
266 | + c = len(MEMORIAB[t].SIMP[s].COMP) | ||
267 | + #print "Hay " + str(c) + " COMP en este SIMP" | ||
268 | + tt = 0 | ||
269 | + while c > 0 : | ||
270 | + c = c - 1 | ||
271 | + if ( 'conjunct' in MEMORIAB[0].SIMP[s].COMP[c] ) and ( not ( 'conjunction' in MEMORIAB[0].SIMP[s].COMP[c] ) ) : | ||
272 | + TEMPORALES.append(Sentence()) | ||
273 | + TEMPORALES[tt] = copy.deepcopy(MEMORIAB[t]) | ||
274 | + #sustituye todo el contenido de TEMPORAL.r/TREE, por el contenido la oracion coordinada | ||
275 | + #replaced = MEMORIAB[0].SIMP[s].TEXT | ||
276 | + indice1 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('[')+1:TEMPORALES[tt].SIMP[s].COMP[c].find('..')]) | ||
277 | + indice2 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('..')+2:TEMPORALES[tt].SIMP[s].COMP[c].find(']')]) | ||
278 | + replacer = MEMORIAB[0].TEXT[indice1:indice2] | ||
279 | + #print replacer | ||
280 | + TEMPORALES[tt].TEXT = replacer | ||
281 | + ## si la oracion no termina en punto o ! | ||
282 | + tt = tt + 1 | ||
283 | + #copiar simplificaciones de memoria temporal a MEMORIAB | ||
284 | + indtempamem = 0 | ||
285 | + while indtempamem < len(TEMPORALES) : | ||
286 | + MEMORIAB.append(Sentence()) | ||
287 | + MEMORIAB[-1]=copy.deepcopy(TEMPORALES[indtempamem]) | ||
288 | + MEMORIAB[-1].FLAG = True | ||
289 | + #print MEMORIAB[-1].TEXT | ||
290 | + indtempamem = indtempamem + 1 | ||
291 | + elif 'full relative clause' in MEMORIAB[t].SIMP[s].TYPE: | ||
292 | + #print "Aplico regla RelCl" | ||
293 | + TEMPORALES = [] | ||
294 | + c = 0 | ||
295 | + tt = 0 | ||
296 | + while c < 2 : | ||
297 | + if 'referred noun phrase' in MEMORIAB[0].SIMP[s].COMP[c] : | ||
298 | + TEMPORALES.append(Sentence()) | ||
299 | + TEMPORALES[tt] = copy.deepcopy(MEMORIAB[t]) #ok | ||
300 | + if MEMORIAB[0].TEXT[MEMORIAB[0].TEXT.index(TEMPORALES[tt].SIMP[s].TEXT)+len(TEMPORALES[tt].SIMP[s].TEXT)-1] == ',': | ||
301 | + replaced = MEMORIAB[0].SIMP[s].TEXT + ',' #posible error, si es asi probar con ' ,' | ||
302 | + else: | ||
303 | + replaced = MEMORIAB[0].SIMP[s].TEXT | ||
304 | + indice1 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('[')+1:TEMPORALES[tt].SIMP[s].COMP[c].find('..')]) | ||
305 | + indice2 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('..')+2:TEMPORALES[tt].SIMP[s].COMP[c].find(']')]) | ||
306 | + replacer = MEMORIAB[0].TEXT[indice1:indice2] | ||
307 | + TEMPORALES[tt].TEXT = TEMPORALES[tt].TEXT.replace(replaced,replacer) | ||
308 | + indice3 = indice1 | ||
309 | + indice4 = indice2 | ||
310 | + if 'clause' in MEMORIAB[0].SIMP[s].COMP[c] : | ||
311 | + TEMPORALES.append(Sentence()) | ||
312 | + TEMPORALES[tt] = copy.deepcopy(MEMORIAB[t]) #ok | ||
313 | + indice1 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('[')+1:TEMPORALES[tt].SIMP[s].COMP[c].find('..')]) | ||
314 | + indice2 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('..')+2:TEMPORALES[tt].SIMP[s].COMP[c].find(']')]) | ||
315 | + TEMPORALES[tt].TEXT = copy.deepcopy(MEMORIAB[0].TEXT[indice3:indice4]+' '+MEMORIAB[0].TEXT[indice1:indice2] ) ## | ||
316 | + cad3 = MEMORIAB[0].TEXT[indice1:indice2] | ||
317 | + cad4 = cad3.split() | ||
318 | + if (cad4[0]+'_WDT') in frase.POS: | ||
319 | + TEMPORALES[tt].TEXT = TEMPORALES[tt].TEXT.replace(' '+cad4[0],'') | ||
320 | + tt = tt + 1 | ||
321 | + c = c + 1 | ||
322 | + #copiar simplificaciones de memoria temporal a MEMORIAB | ||
323 | + indtempamem = 0 | ||
324 | + while indtempamem < len(TEMPORALES) : | ||
325 | + MEMORIAB.append(Sentence()) | ||
326 | + MEMORIAB[-1]=copy.deepcopy(TEMPORALES[indtempamem]) | ||
327 | + MEMORIAB[-1].FLAG = True | ||
328 | + #print MEMORIAB[-1].TEXT | ||
329 | + indtempamem = indtempamem + 1 | ||
330 | + elif 'reduced relative clause' in MEMORIAB[t].SIMP[s].TYPE: | ||
331 | + #print "Aplico regla RelCl" | ||
332 | + TEMPORALES = [] | ||
333 | + c = 0 | ||
334 | + tt = 0 | ||
335 | + while c < 2 : | ||
336 | + if 'referred noun phrase' in MEMORIAB[0].SIMP[s].COMP[c] : | ||
337 | + TEMPORALES.append(Sentence()) | ||
338 | + TEMPORALES[tt] = copy.deepcopy(MEMORIAB[t]) #ok | ||
339 | + replaced = MEMORIAB[0].SIMP[s].TEXT | ||
340 | + indice1 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('[')+1:TEMPORALES[tt].SIMP[s].COMP[c].find('..')]) | ||
341 | + indice2 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('..')+2:TEMPORALES[tt].SIMP[s].COMP[c].find(']')]) | ||
342 | + replacer = MEMORIAB[0].TEXT[indice1:indice2] | ||
343 | + #subj = MEMORIAB[0].TEXT[indice1:(indice2+1)] | ||
344 | + subj = MEMORIAB[0].TEXT[indice1:(indice2)] | ||
345 | + TEMPORALES[tt].TEXT = TEMPORALES[tt].TEXT.replace(replaced,replacer) | ||
346 | + if 'clause' in MEMORIAB[0].SIMP[s].COMP[c] : | ||
347 | + TEMPORALES.append(Sentence()) | ||
348 | + TEMPORALES[tt] = copy.deepcopy(MEMORIAB[t]) #el referente debera estar antes que la clausula para tener orden correcto | ||
349 | + indice1 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('[')+1:TEMPORALES[tt].SIMP[s].COMP[c].find('..')]) | ||
350 | + indice2 = (int)(TEMPORALES[tt].SIMP[s].COMP[c][TEMPORALES[tt].SIMP[s].COMP[c].find('..')+2:TEMPORALES[tt].SIMP[s].COMP[c].find(']')]) | ||
351 | + replacer = MEMORIAB[0].TEXT[indice1:indice2] | ||
352 | + TEMPORALES[tt].TEXT = subj + " _ " + replacer #en este punto para ingresar copula necesitas info de numero y tiempo | ||
353 | + tt = tt + 1 | ||
354 | + c = c + 1 | ||
355 | + #copiar simplificaciones de memoria temporal a MEMORIAB | ||
356 | + indtempamem = 0 | ||
357 | + while indtempamem < len(TEMPORALES) : | ||
358 | + MEMORIAB.append(Sentence()) | ||
359 | + MEMORIAB[-1]=copy.deepcopy(TEMPORALES[indtempamem]) | ||
360 | + MEMORIAB[-1].FLAG = True | ||
361 | + #print MEMORIAB[-1].TEXT | ||
362 | + indtempamem = indtempamem + 1 | ||
363 | + elif 'hypernymy' in MEMORIAB[t].SIMP[s].TYPE: | ||
364 | + print "**hypernymy detected**" | ||
365 | + #print "True to: " + MEMORIAB[t].TEXT | ||
366 | + MEMORIAB[t].FLAG = True | ||
367 | + else: | ||
368 | + print "Error: Unknown simplification construct detected." | ||
369 | + #print "True to: " + MEMORIAB[t].TEXT | ||
370 | + MEMORIAB[t].FLAG = True | ||
371 | + t = t + 1 | ||
372 | + s = s + 1 | ||
373 | + | ||
374 | + #CONDICIONES PARA IMPRESION DE SIMPLIFICACIONES EN ARCHIVO DE TEXTO | ||
375 | + print "Sentence simplificated. New sentences generated:" | ||
376 | + for i in range(len(MEMORIAB)): | ||
377 | + #se reutiliza flag para marcar las oraciones finales | ||
378 | + MEMORIAB[i].FLAG = True | ||
379 | + for j in range(len(MEMORIAB[0].SIMP)): | ||
380 | + #NOTA: si se agrega un constructo simplificable, anadirlo tambien a esta lista: | ||
381 | + if ( ('member-collection' in MEMORIAB[0].SIMP[j].TYPE) or ('apposition' in MEMORIAB[0].SIMP[j].TYPE) or ('coordination' in MEMORIAB[0].SIMP[j].TYPE) or ('parenthesis' in MEMORIAB[0].SIMP[j].TYPE) or ('sentence coordination' in MEMORIAB[0].SIMP[j].TYPE) or ('full relative clause' in MEMORIAB[0].SIMP[j].TYPE) or ('reduced relative clause' in MEMORIAB[0].SIMP[j].TYPE) ) and (MEMORIAB[0].SIMP[j].TEXT in MEMORIAB[i].TEXT) : | ||
382 | + MEMORIAB[i].FLAG = False | ||
383 | + | ||
384 | + ##areglar numeracion archivos salida ej 011 | ||
385 | + arcsalnum = 0 | ||
386 | + for i in range(len(MEMORIAB)): | ||
387 | + if MEMORIAB[i].FLAG == True: | ||
388 | + arcsalnum = arcsalnum + 1 | ||
389 | + length = len(str(arcsalnum)) | ||
390 | + #print('{:03d}'.format(arcsalnum)) # python >= 2.7 + python3 | ||
391 | +# >>> n = '4' | ||
392 | +#>>> print n.zfill(3) | ||
393 | + arcsalnum = 0 | ||
394 | + for i in range(len(MEMORIAB)): | ||
395 | + if MEMORIAB[i].FLAG == True: | ||
396 | + arcsalnum = arcsalnum + 1 | ||
397 | + print MEMORIAB[i].TEXT#Salida | ||
398 | + archSalNombre = sys.argv[2] | ||
399 | + archSalNombre=archSalNombre[:-4] + "-" + (str(arcsalnum)).zfill(length) + '.alg' | ||
400 | + archivoSalida=open(archSalNombre,"w") | ||
401 | + archivoSalida.write(MEMORIAB[i].TEXT+"\n")## | ||
402 | + archivoSalida.close() | ||
403 | + #WRITE OUTPUT FILE PATH TO INDEX (Arg 3) | ||
404 | + index_name = sys.argv[3] | ||
405 | + index = open(index_name, "a+") | ||
406 | + archSalNombreforIndex=archSalNombre + "\n" | ||
407 | + index.write(archSalNombreforIndex) | ||
408 | + index.close() | ||
409 | +else: | ||
410 | + print frase.TEXT #----Salida si no habia constructos simplificables | ||
411 | + archSalNombre = sys.argv[2] | ||
412 | + archSalNombre = archSalNombre[:-4] + ".alg" | ||
413 | + archivoSalida = open(archSalNombre,"a+") | ||
414 | + archivoSalida.write(frase.TEXT+"\n")## | ||
415 | + archivoSalida.close() | ||
416 | + #WRITE OUTPUT FILE PATH TO INDEX (Arg 3) | ||
417 | + index_name = sys.argv[3] | ||
418 | + index = open(index_name, "a+") | ||
419 | + archSalNombreforIndex=archSalNombre + "\n" | ||
420 | + index.write(archSalNombreforIndex) | ||
421 | + index.close() | ||
422 | + | ||
423 | + | ||
424 | +#FIN |
-
Please register or login to post a comment