Showing
17 changed files
with
1388 additions
and
1157 deletions
... | @@ -34,7 +34,7 @@ echo | ... | @@ -34,7 +34,7 @@ echo |
34 | echo | 34 | echo |
35 | echo "Add sentence-end-tag PGCGROWTHCONDITIONS" | 35 | echo "Add sentence-end-tag PGCGROWTHCONDITIONS" |
36 | #cext=$(grep -E ".*" $(cat $index | tr '\n' ' ')| sed 's/"//g'| sed 's/.tsv:/.tsv\t/' | tr '/' '\t'| cut -f8,9 | sort | uniq | awk 'BEGIN {FS="\t"} length($2) > 3 { print $_}' | sed 's/\\null\\/null/g'| sed 's/.tsv//g' | sed 's/-/\t/' | sed 's/-/\t/' ) | 36 | #cext=$(grep -E ".*" $(cat $index | tr '\n' ' ')| sed 's/"//g'| sed 's/.tsv:/.tsv\t/' | tr '/' '\t'| cut -f8,9 | sort | uniq | awk 'BEGIN {FS="\t"} length($2) > 3 { print $_}' | sed 's/\\null\\/null/g'| sed 's/.tsv//g' | sed 's/-/\t/' | sed 's/-/\t/' ) |
37 | -cext=$(grep -E ".*" $(cat $index | tr '\n' ' ') | sed 's/\//\t/7'| cut -f2-3 | sed 's/-/\t/' | sed 's/-/\t/' | sed 's/.tsv:/\t/' | sed 's/\"//g' | sed 's/1.\tNeubauer//'| sed 's/\\null\\/null/g' | sort | uniq) | 37 | +cext=$(grep -E ".*" $(cat $index | tr '\n' ' ') | sed 's/\//\t/7' | sed 's/1.\tNeubauer//' | cut -f2-3 | sed 's/-/\t/' | sed 's/-/\t/' | sed 's/.tsv:/\t/' | sed 's/\"//g'|sed 's/\\null\\//g' | sort | uniq) |
38 | echo "$cext" | cut -f4 | awk '{ print $_ " PGCGROWTHCONDITIONS" }' > $output | 38 | echo "$cext" | cut -f4 | awk '{ print $_ " PGCGROWTHCONDITIONS" }' > $output |
39 | wc $output | 39 | wc $output |
40 | echo "$cext" | cut -f1-3,5 > $mapping | 40 | echo "$cext" | cut -f1-3,5 > $mapping | ... | ... |
This diff could not be displayed because it is too large.
This file is too large to display.
... | @@ -60,8 +60,10 @@ import training_validation_v14 as training | ... | @@ -60,8 +60,10 @@ import training_validation_v14 as training |
60 | # --infoFile bg_sentences_midx.txt | 60 | # --infoFile bg_sentences_midx.txt |
61 | # --variant 13 | 61 | # --variant 13 |
62 | 62 | ||
63 | +#Examples | ||
63 | #python3 tagging.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ --outputFileI annot-input_bg_outputI.txt --outputFileII annot-input_bg_outputII.txt --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping --infoFile bg_sentences_midx.txt --variant 13 --S4 --S1 > ../../reports/output_tagging_report.txt | 64 | #python3 tagging.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ --outputFileI annot-input_bg_outputI.txt --outputFileII annot-input_bg_outputII.txt --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping --infoFile bg_sentences_midx.txt --variant 13 --S4 --S1 > ../../reports/output_tagging_report.txt |
64 | #python3 predict-annot/bin/tagging/tagging.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ --outputFileI annot-input_bg_outputI_v4.txt --outputFileII annot-input_bg_outputII_v4 --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping --infoFile bg_sentences_midx_v4.txt --variant 13 --S4 --S1 > predict-annot/reports/output_tagging_report_v4.txt | 65 | #python3 predict-annot/bin/tagging/tagging.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ --outputFileI annot-input_bg_outputI_v4.txt --outputFileII annot-input_bg_outputII_v4 --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping --infoFile bg_sentences_midx_v4.txt --variant 13 --S4 --S1 > predict-annot/reports/output_tagging_report_v4.txt |
66 | +#python3 predict-annot/bin/tagging/tagging.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ --outputFileI annot-input_bg_outputI_v4.txt --outputFileII annot-input_bg_outputII_v4 --outputFileII annot-input_bg_outputIII_v4 --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping --infoFile bg_sentences_midx_v4.txt --variant 13 --S4 --S1 > predict-annot/reports/annot-input_bg_report_v4.txt | ||
65 | 67 | ||
66 | __author__ = 'egaytan' | 68 | __author__ = 'egaytan' |
67 | 69 | ||
... | @@ -70,12 +72,13 @@ __author__ = 'egaytan' | ... | @@ -70,12 +72,13 @@ __author__ = 'egaytan' |
70 | ########################################## | 72 | ########################################## |
71 | 73 | ||
72 | if __name__ == "__main__": | 74 | if __name__ == "__main__": |
73 | - # Defining parameters | 75 | + ########################################### Defining parameters ########################################## |
74 | parser = OptionParser() | 76 | parser = OptionParser() |
75 | parser.add_option("--inputPath", dest="inputPath", help="Path of training data set", metavar="PATH") | 77 | parser.add_option("--inputPath", dest="inputPath", help="Path of training data set", metavar="PATH") |
76 | parser.add_option("--outputPath", dest="outputPath", help="Output path to place output files", metavar="PATH") | 78 | parser.add_option("--outputPath", dest="outputPath", help="Output path to place output files", metavar="PATH") |
77 | parser.add_option("--outputFileI", dest="outFileI", help="Output tagged file I", metavar="FILE") | 79 | parser.add_option("--outputFileI", dest="outFileI", help="Output tagged file I", metavar="FILE") |
78 | parser.add_option("--outputFileII", dest="outFileII", help="Output tagged file II", metavar="FILE") | 80 | parser.add_option("--outputFileII", dest="outFileII", help="Output tagged file II", metavar="FILE") |
81 | + parser.add_option("--outputFileIII", dest="outFileIII", help="Output tagged file III", metavar="FILE") | ||
79 | parser.add_option("--modelPath", dest="modelPath", help="Path to read CRF model", metavar="PATH") | 82 | parser.add_option("--modelPath", dest="modelPath", help="Path to read CRF model", metavar="PATH") |
80 | parser.add_option("--modelName", dest="modelName", help="Model name", metavar="TEXT") | 83 | parser.add_option("--modelName", dest="modelName", help="Model name", metavar="TEXT") |
81 | parser.add_option("--infoPath", dest="infoPath", help="Path of GSE-GSM index file", metavar="PATH") | 84 | parser.add_option("--infoPath", dest="infoPath", help="Path of GSE-GSM index file", metavar="PATH") |
... | @@ -93,13 +96,14 @@ if __name__ == "__main__": | ... | @@ -93,13 +96,14 @@ if __name__ == "__main__": |
93 | parser.error("Any parameter given.") | 96 | parser.error("Any parameter given.") |
94 | sys.exit(1) | 97 | sys.exit(1) |
95 | 98 | ||
96 | - | 99 | + ########################################### DISP PARAMETERS ########################################## |
97 | print('-------------------------------- PARAMETERS --------------------------------') | 100 | print('-------------------------------- PARAMETERS --------------------------------') |
98 | 101 | ||
99 | print("--inputPath Path of training data set : " + str(options.inputPath )) | 102 | print("--inputPath Path of training data set : " + str(options.inputPath )) |
100 | print("--outputPath Output path to place output files: " + str(options.outputPath )) | 103 | print("--outputPath Output path to place output files: " + str(options.outputPath )) |
101 | print("--outputFileI Output tagged file I : " + str(options.outFileI )) | 104 | print("--outputFileI Output tagged file I : " + str(options.outFileI )) |
102 | print("--outputFileII Output tagged file II : " + str(options.outFileII )) | 105 | print("--outputFileII Output tagged file II : " + str(options.outFileII )) |
106 | + print("--outputFileII Output tagged file III : " + str(options.outFileIII )) | ||
103 | print("--modelPath Path to read CRF model : " + str(options.modelPath )) | 107 | print("--modelPath Path to read CRF model : " + str(options.modelPath )) |
104 | print("--modelName Model name : " + str(options.modelName )) | 108 | print("--modelName Model name : " + str(options.modelName )) |
105 | print("--infoPath Path of GSE-GSM index file : " + str(options.infoPath )) | 109 | print("--infoPath Path of GSE-GSM index file : " + str(options.infoPath )) |
... | @@ -115,25 +119,29 @@ if __name__ == "__main__": | ... | @@ -115,25 +119,29 @@ if __name__ == "__main__": |
115 | 119 | ||
116 | symbols = ['.', ',', ':', ';', '?', '!', '\'', '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', | 120 | symbols = ['.', ',', ':', ';', '?', '!', '\'', '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', |
117 | '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...'] | 121 | '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...'] |
118 | - | 122 | + #print("Filtering symbols " + str(symbols) + ': ' + str(options.filterSymbols)) |
119 | - print("Filtering symbols " + str(symbols) + ': ' + str(options.filterSymbols)) | 123 | + ########################################### PROCESSING ########################################## |
120 | - | ||
121 | print('-------------------------------- PROCESSING --------------------------------') | 124 | print('-------------------------------- PROCESSING --------------------------------') |
122 | 125 | ||
123 | stopwords = [word for word in stopwords.words('english')] | 126 | stopwords = [word for word in stopwords.words('english')] |
124 | - # Read index | 127 | + # Read index mapping GSE file information |
125 | idx = open(os.path.join(options.infoPath, options.idx), "r").readlines() | 128 | idx = open(os.path.join(options.infoPath, options.idx), "r").readlines() |
126 | 129 | ||
127 | - # Read CRF model | 130 | + |
131 | + ########################################### Read CRF model ########################################## | ||
128 | t0 = time() | 132 | t0 = time() |
129 | print('Reading CRF model...') | 133 | print('Reading CRF model...') |
130 | crf = joblib.load(os.path.join(options.modelPath, options.modelName + '.mod')) | 134 | crf = joblib.load(os.path.join(options.modelPath, options.modelName + '.mod')) |
131 | print("Reading CRF model done in: %fs" % (time() - t0)) | 135 | print("Reading CRF model done in: %fs" % (time() - t0)) |
132 | 136 | ||
133 | - # Reading sentences | 137 | + |
138 | + ########################################### Reading sentences ########################################## | ||
134 | print('Processing corpus...') | 139 | print('Processing corpus...') |
135 | t0 = time() | 140 | t0 = time() |
136 | labels = list(['Gtype', 'Gversion', 'Med', 'Phase', 'Strain', 'Substrain', 'Supp', 'Technique', 'Temp', 'OD', 'Anti', 'Agit', 'Air', 'Vess', 'pH']) | 141 | labels = list(['Gtype', 'Gversion', 'Med', 'Phase', 'Strain', 'Substrain', 'Supp', 'Technique', 'Temp', 'OD', 'Anti', 'Agit', 'Air', 'Vess', 'pH']) |
142 | + | ||
143 | + | ||
144 | + ##################################################################################### | ||
137 | # Walk directory to read files | 145 | # Walk directory to read files |
138 | for path, dirs, files in os.walk(options.inputPath): | 146 | for path, dirs, files in os.walk(options.inputPath): |
139 | # For each file in dir | 147 | # For each file in dir |
... | @@ -165,25 +173,24 @@ if __name__ == "__main__": | ... | @@ -165,25 +173,24 @@ if __name__ == "__main__": |
165 | print("Sentences input data: " + str(len(sentencesInputData))) | 173 | print("Sentences input data: " + str(len(sentencesInputData))) |
166 | 174 | ||
167 | 175 | ||
168 | - # Predicting tags | 176 | + ########################################### Predicting tags ########################################### |
169 | t1 = time() | 177 | t1 = time() |
170 | - print("Predicting tags with model") | 178 | + print("Predicting tags with model...") |
171 | y_pred = crf.predict(X_input) | 179 | y_pred = crf.predict(X_input) |
172 | print("Prediction done in: %fs" % (time() - t1)) | 180 | print("Prediction done in: %fs" % (time() - t1)) |
173 | 181 | ||
174 | 182 | ||
175 | - # Tagging with CRF model | 183 | + ########################################### Tagging with CRF model ########################################### |
176 | - print("Tagging file") | 184 | + print("Tagging file...") |
177 | lidx = 0 | 185 | lidx = 0 |
178 | for line, tagLine in zip(lines, y_pred): | 186 | for line, tagLine in zip(lines, y_pred): |
179 | Ltags = set(labels).intersection(set(tagLine)) | 187 | Ltags = set(labels).intersection(set(tagLine)) |
180 | outputLine = '' | 188 | outputLine = '' |
181 | line = line.strip('\n') | 189 | line = line.strip('\n') |
182 | - | 190 | + |
183 | - #print("\nLine: " + str(line)) | ||
184 | - #print ("CRF tagged line: " + str(tagLine)) | ||
185 | tb = 'O' | 191 | tb = 'O' |
186 | i = 0 | 192 | i = 0 |
193 | + ########################## one word sentences ########################## | ||
187 | if len(tagLine)==1: | 194 | if len(tagLine)==1: |
188 | if tagLine[0] in labels: | 195 | if tagLine[0] in labels: |
189 | start = '<' + tagLine[0] + '> ' | 196 | start = '<' + tagLine[0] + '> ' |
... | @@ -192,9 +199,11 @@ if __name__ == "__main__": | ... | @@ -192,9 +199,11 @@ if __name__ == "__main__": |
192 | outputLine = start + word + end | 199 | outputLine = start + word + end |
193 | else: | 200 | else: |
194 | outputLine = line.split(' ')[0] | 201 | outputLine = line.split(' ')[0] |
195 | - #print(outputLine + '\t' + ', '.join(Ltags)) | 202 | + ########################## Saving Sentence Ouput I ########################## |
196 | - sentencesOutputDataI.append(idx[lidx].replace('\n','\t') + outputLine + ', '.join(Ltags)) | 203 | + sentencesOutputDataI.append(idx[lidx].replace('\n','\t') + outputLine + '\t' + ', '.join(Ltags)) |
204 | + ########################## Saving Sentence Ouput II ########################## | ||
197 | sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + word.split('|')[0] + '\t' + tag) | 205 | sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + word.split('|')[0] + '\t' + tag) |
206 | + lidx += 1 | ||
198 | continue | 207 | continue |
199 | 208 | ||
200 | sentence = '' | 209 | sentence = '' |
... | @@ -216,6 +225,7 @@ if __name__ == "__main__": | ... | @@ -216,6 +225,7 @@ if __name__ == "__main__": |
216 | # end sentence | 225 | # end sentence |
217 | outputLine += word.split('|')[0] + ' ' | 226 | outputLine += word.split('|')[0] + ' ' |
218 | outputLine += '</' + tag + '/> ' | 227 | outputLine += '</' + tag + '/> ' |
228 | + ########################## Saving Sentence Ouput II ########################## | ||
219 | sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + sentence + word.split('|')[0] + '\t' +tag) | 229 | sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + sentence + word.split('|')[0] + '\t' +tag) |
220 | sb = False | 230 | sb = False |
221 | tb = 'O' | 231 | tb = 'O' |
... | @@ -225,6 +235,7 @@ if __name__ == "__main__": | ... | @@ -225,6 +235,7 @@ if __name__ == "__main__": |
225 | # start new tag | 235 | # start new tag |
226 | outputLine += word.split('|')[0] + ' ' | 236 | outputLine += word.split('|')[0] + ' ' |
227 | outputLine += '</' + tag + '/> ' | 237 | outputLine += '</' + tag + '/> ' |
238 | + ########################## Saving Sentence Ouput II ########################## | ||
228 | sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + sentence + word.split('|')[0] + '\t' +tag) | 239 | sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + sentence + word.split('|')[0] + '\t' +tag) |
229 | sb = False | 240 | sb = False |
230 | tb = 'O' | 241 | tb = 'O' |
... | @@ -235,21 +246,32 @@ if __name__ == "__main__": | ... | @@ -235,21 +246,32 @@ if __name__ == "__main__": |
235 | i += 1 | 246 | i += 1 |
236 | if sb: | 247 | if sb: |
237 | sentence+= word.split('|')[0] + ' ' | 248 | sentence+= word.split('|')[0] + ' ' |
238 | - #print(outputLine + '\t' + ', '.join(Ltags)) | 249 | + ########################## Saving Sentence Ouput I ########################## |
239 | - sentencesOutputDataI.append(idx[lidx].replace('\n', '\t') + outputLine+ ', '.join(Ltags)) | 250 | + sentencesOutputDataI.append(idx[lidx].replace('\n', '\t') + outputLine+ '\t' +', '.join(Ltags)) |
240 | - lidx += 1 | 251 | + lidx += 1 |
241 | - | 252 | + |
242 | - #print( DF(sentencesOutputDataI) ) | 253 | + ########################################### Save Output I ########################################## |
243 | - #print( '\n'.join(sentencesOutputDataII) ) | 254 | + print("Saving Ouput I...") |
244 | - # Save tags | ||
245 | - with open(os.path.join(options.outputPath, options.outFileII + '_' + options.modelName + '.tsv'), "w") as oFile: | ||
246 | - for line in sentencesOutputDataII: | ||
247 | - #print(line) | ||
248 | - oFile.write(line + '\n') | ||
249 | with open(os.path.join(options.outputPath, options.outFileI + '_' + options.modelName + '.tsv'), "w") as oFileI: | 255 | with open(os.path.join(options.outputPath, options.outFileI + '_' + options.modelName + '.tsv'), "w") as oFileI: |
250 | for line in sentencesOutputDataI: | 256 | for line in sentencesOutputDataI: |
251 | if re.findall('</', line): | 257 | if re.findall('</', line): |
252 | - print(line) | 258 | + #print(line) |
253 | - #oFileI.write(line + '\n') | 259 | + oline = line.replace('LDR','(') |
260 | + oline = oline.replace('RDR',')') | ||
261 | + oFileI.write(oline + '\n') | ||
262 | + ########################################### Save Output II ########################################## | ||
263 | + print("Saving Ouput II...") | ||
264 | + with open(os.path.join(options.outputPath, options.outFileII + '_' + options.modelName + '.tsv'), "w") as oFileII: | ||
265 | + for line in sentencesOutputDataII: | ||
266 | + #print(line) | ||
267 | + oline = line.replace('LDR','(') | ||
268 | + oline = oline.replace('RDR',')') | ||
269 | + oFileII.write(oline + '\n') | ||
270 | + ########################################### Save Output III ########################################## | ||
271 | + print("Saving Ouput III...") | ||
272 | + with open(os.path.join(options.outputPath, options.outFileIII + '_' + options.modelName + '.tsv'), "w") as oFileIII: | ||
273 | + for line, tagLine in zip(lines, y_pred): | ||
274 | + oline = [ w.split('|')[0].replace('LDR','(').replace('LDR','(')+'|'+tag for w,tag in zip(line.split(' '), tagLine)] | ||
254 | 275 | ||
276 | + oFileIII.write(' '.join(oline) + '\n') | ||
255 | print("Processing corpus done in: %fs" % (time() - t0)) | 277 | print("Processing corpus done in: %fs" % (time() - t0)) | ... | ... |
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
... | @@ -9328,7 +9328,7 @@ GSE12006 GSM303526 GPL3154-PMID:18940002 characteristics_ch1.1 | ... | @@ -9328,7 +9328,7 @@ GSE12006 GSM303526 GPL3154-PMID:18940002 characteristics_ch1.1 |
9328 | GSE12006 GSM303526 GPL3154-PMID:18940002 growth_protocol_ch1.1 | 9328 | GSE12006 GSM303526 GPL3154-PMID:18940002 growth_protocol_ch1.1 |
9329 | GSE12006 GSM303526 GPL3154-PMID:18940002 growth_protocol_ch1.2 | 9329 | GSE12006 GSM303526 GPL3154-PMID:18940002 growth_protocol_ch1.2 |
9330 | GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.4 | 9330 | GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.4 |
9331 | -GSE12006 GSM303527 GPL3154-PMID:18940002 | 9331 | +GSE12006 GSM303527 GPL3154-PMID:18940002 extract_protocol_ch1.3 |
9332 | GSE12006 GSM303527 GPL3154-PMID:18940002 title.1 | 9332 | GSE12006 GSM303527 GPL3154-PMID:18940002 title.1 |
9333 | GSE12006 GSM303527 GPL3154-PMID:18940002 source_name_ch1.1 | 9333 | GSE12006 GSM303527 GPL3154-PMID:18940002 source_name_ch1.1 |
9334 | GSE12006 GSM303527 GPL3154-PMID:18940002 organism_ch1.1 | 9334 | GSE12006 GSM303527 GPL3154-PMID:18940002 organism_ch1.1 |
... | @@ -9340,7 +9340,7 @@ GSE12006 GSM303527 GPL3154-PMID:18940002 characteristics_ch1.1 | ... | @@ -9340,7 +9340,7 @@ GSE12006 GSM303527 GPL3154-PMID:18940002 characteristics_ch1.1 |
9340 | GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.1 | 9340 | GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.1 |
9341 | GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.2 | 9341 | GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.2 |
9342 | GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.4 | 9342 | GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.4 |
9343 | -GSE12006 GSM303528 GPL3154-PMID:18940002 | 9343 | +GSE12006 GSM303528 GPL3154-PMID:18940002 extract_protocol_ch1.3 |
9344 | GSE12006 GSM303528 GPL3154-PMID:18940002 title.1 | 9344 | GSE12006 GSM303528 GPL3154-PMID:18940002 title.1 |
9345 | GSE12006 GSM303528 GPL3154-PMID:18940002 source_name_ch1.1 | 9345 | GSE12006 GSM303528 GPL3154-PMID:18940002 source_name_ch1.1 |
9346 | GSE12006 GSM303528 GPL3154-PMID:18940002 organism_ch1.1 | 9346 | GSE12006 GSM303528 GPL3154-PMID:18940002 organism_ch1.1 |
... | @@ -9352,7 +9352,7 @@ GSE12006 GSM303528 GPL3154-PMID:18940002 characteristics_ch1.1 | ... | @@ -9352,7 +9352,7 @@ GSE12006 GSM303528 GPL3154-PMID:18940002 characteristics_ch1.1 |
9352 | GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.1 | 9352 | GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.1 |
9353 | GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.2 | 9353 | GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.2 |
9354 | GSE12006 GSM303529 GPL3154-PMID:18940002 growth_protocol_ch1.4 | 9354 | GSE12006 GSM303529 GPL3154-PMID:18940002 growth_protocol_ch1.4 |
9355 | -GSE12006 GSM303529 GPL3154-PMID:18940002 | 9355 | +GSE12006 GSM303529 GPL3154-PMID:18940002 extract_protocol_ch1.3 |
9356 | GSE12006 GSM303529 GPL3154-PMID:18940002 title.1 | 9356 | GSE12006 GSM303529 GPL3154-PMID:18940002 title.1 |
9357 | GSE12006 GSM303529 GPL3154-PMID:18940002 source_name_ch1.1 | 9357 | GSE12006 GSM303529 GPL3154-PMID:18940002 source_name_ch1.1 |
9358 | GSE12006 GSM303529 GPL3154-PMID:18940002 organism_ch1.1 | 9358 | GSE12006 GSM303529 GPL3154-PMID:18940002 organism_ch1.1 | ... | ... |
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
File mode changed
This diff could not be displayed because it is too large.
1 | --------------------------------- PARAMETERS -------------------------------- | ||
2 | -Path of CoreNLP output: /home/egaytan/automatic-extraction-growth-conditions/CoreNLP/output/annotation | ||
3 | -File with CoreNLP-tagging bg-sentences: bg_sentences_v3.txt.ner | ||
4 | -Path to save data set: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input | ||
5 | -File to save recontrsucted bg-sentences: annot-input_bg_v3.txt | ||
6 | --------------------------------- PROCESSING -------------------------------- | ||
7 | -Number of sentences: 14716 | ||
8 | -==================================END=================================== |
1 | -------------------------------- PARAMETERS -------------------------------- | 1 | -------------------------------- PARAMETERS -------------------------------- |
2 | -Path of CoreNLP output: /home/egaytan/automatic-extraction-growth-conditions/CoreNLP/output/annotation | 2 | +--inputPath Path of training data set : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ |
3 | -File with CoreNLP-tagging bg-sentences: bg_sentences_v4.txt.ner | 3 | +--outputPath Output path to place output files: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ |
4 | -Path to save data set: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input | 4 | +--outputFileI Output tagged file I : annot-input_bg_outputI_v4 |
5 | -File to save recontrsucted bg-sentences: annot-input_bg_v4.txt | 5 | +--outputFileII Output tagged file II : annot-input_bg_outputII_v4 |
6 | +--outputFileII Output tagged file III : annot-input_bg_outputIII_v4 | ||
7 | +--modelPath Path to read CRF model : /home/egaytan/automatic-extraction-growth-conditions/CRF/models | ||
8 | +--modelName Model name : model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 | ||
9 | +--infoPath Path of GSE-GSM index file : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping | ||
10 | +--infoFile GSE-GSM index file : bg_sentences_midx_v4.txt | ||
11 | +--variant Run variant : 13 | ||
12 | +--S1 General features : True | ||
13 | +--S2 Inner/Complete word features : False | ||
14 | +--S3 Extended context features : False | ||
15 | +--S4 Semantic features : True | ||
16 | +--filteringStopWords Filtering stop words : False | ||
17 | +--filterSymbols Filtering punctuation marks : False | ||
6 | -------------------------------- PROCESSING -------------------------------- | 18 | -------------------------------- PROCESSING -------------------------------- |
7 | -Number of sentences: 90904 | 19 | +Reading CRF model... |
8 | -==================================END=================================== | 20 | +Reading CRF model done in: 0.009463s |
21 | +Processing corpus... | ||
22 | +Preprocessing file...annot-input_bg_v4.txt | ||
23 | +Sentences input data: 90688 | ||
24 | +Predicting tags with model... | ||
25 | +Prediction done in: 26.367272s | ||
26 | +Tagging file... | ||
27 | +Saving Ouput I... | ||
28 | +Saving Ouput II... | ||
29 | +Saving Ouput III... | ||
30 | +Processing corpus done in: 56.584394s | ... | ... |
1 | --------------------------------- PARAMETERS -------------------------------- | ||
2 | ---inputPath Path of training data set : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ | ||
3 | ---outputPath Output path to place output files: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ | ||
4 | ---outputFileI Output tagged file I : annot-input_bg_outputI.txt | ||
5 | ---outputFileII Output tagged file II : annot-input_bg_outputII.txt | ||
6 | ---modelPath Path to read CRF model : /home/egaytan/automatic-extraction-growth-conditions/CRF/models | ||
7 | ---modelName Model name : model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 | ||
8 | ---infoPath Path of GSE-GSM index file : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping | ||
9 | ---infoFile GSE-GSM index file : bg_sentences_midx.txt | ||
10 | ---variant Run variant : 13 | ||
11 | ---S1 General features : True | ||
12 | ---S2 Inner/Complete word features : False | ||
13 | ---S3 Extended context features : False | ||
14 | ---S4 Semantic features : True | ||
15 | ---filteringStopWords Filtering stop words : False | ||
16 | ---filterSymbols Filtering punctuation marks : False | ||
17 | -Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False | ||
18 | --------------------------------- PROCESSING -------------------------------- | ||
19 | -Reading CRF model... | ||
20 | -Reading CRF model done in: 0.008336s | ||
21 | -Processing corpus... | ||
22 | -Preprocessing file...annot-input_bg_v3.txt | ||
23 | -Sentences input data: 14716 | ||
24 | -Predicting tags with model | ||
25 | -Prediction done in: 1.688127s | ||
26 | -Tagging file | ||
27 | -Processing corpus done in: 3.948320s |
... | @@ -17,10 +17,16 @@ | ... | @@ -17,10 +17,16 @@ |
17 | Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False | 17 | Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False |
18 | -------------------------------- PROCESSING -------------------------------- | 18 | -------------------------------- PROCESSING -------------------------------- |
19 | Reading CRF model... | 19 | Reading CRF model... |
20 | -Reading CRF model done in: 0.009804s | 20 | +Reading CRF model done in: 0.009363s |
21 | Processing corpus... | 21 | Processing corpus... |
22 | Preprocessing file...annot-input_bg_v3.txt | 22 | Preprocessing file...annot-input_bg_v3.txt |
23 | Sentences input data: 14716 | 23 | Sentences input data: 14716 |
24 | Predicting tags with model | 24 | Predicting tags with model |
25 | -Prediction done in: 1.811103s | 25 | +Prediction done in: 1.737334s |
26 | Tagging file | 26 | Tagging file |
27 | +Preprocessing file...annot-input_bg_v4.txt | ||
28 | +Sentences input data: 90688 | ||
29 | +Predicting tags with model | ||
30 | +Prediction done in: 26.434549s | ||
31 | +Tagging file | ||
32 | +Processing corpus done in: 58.304885s | ... | ... |
-
Please register or login to post a comment