Estefani Gaytan Nunez

upload

...@@ -34,7 +34,7 @@ echo ...@@ -34,7 +34,7 @@ echo
34 echo 34 echo
35 echo "Add sentence-end-tag PGCGROWTHCONDITIONS" 35 echo "Add sentence-end-tag PGCGROWTHCONDITIONS"
36 #cext=$(grep -E ".*" $(cat $index | tr '\n' ' ')| sed 's/"//g'| sed 's/.tsv:/.tsv\t/' | tr '/' '\t'| cut -f8,9 | sort | uniq | awk 'BEGIN {FS="\t"} length($2) > 3 { print $_}' | sed 's/\\null\\/null/g'| sed 's/.tsv//g' | sed 's/-/\t/' | sed 's/-/\t/' ) 36 #cext=$(grep -E ".*" $(cat $index | tr '\n' ' ')| sed 's/"//g'| sed 's/.tsv:/.tsv\t/' | tr '/' '\t'| cut -f8,9 | sort | uniq | awk 'BEGIN {FS="\t"} length($2) > 3 { print $_}' | sed 's/\\null\\/null/g'| sed 's/.tsv//g' | sed 's/-/\t/' | sed 's/-/\t/' )
37 -cext=$(grep -E ".*" $(cat $index | tr '\n' ' ') | sed 's/\//\t/7'| cut -f2-3 | sed 's/-/\t/' | sed 's/-/\t/' | sed 's/.tsv:/\t/' | sed 's/\"//g' | sed 's/1.\tNeubauer//'| sed 's/\\null\\/null/g' | sort | uniq) 37 +cext=$(grep -E ".*" $(cat $index | tr '\n' ' ') | sed 's/\//\t/7' | sed 's/1.\tNeubauer//' | cut -f2-3 | sed 's/-/\t/' | sed 's/-/\t/' | sed 's/.tsv:/\t/' | sed 's/\"//g'|sed 's/\\null\\//g' | sort | uniq)
38 echo "$cext" | cut -f4 | awk '{ print $_ " PGCGROWTHCONDITIONS" }' > $output 38 echo "$cext" | cut -f4 | awk '{ print $_ " PGCGROWTHCONDITIONS" }' > $output
39 wc $output 39 wc $output
40 echo "$cext" | cut -f1-3,5 > $mapping 40 echo "$cext" | cut -f1-3,5 > $mapping
......
This diff could not be displayed because it is too large.
...@@ -60,8 +60,10 @@ import training_validation_v14 as training ...@@ -60,8 +60,10 @@ import training_validation_v14 as training
60 # --infoFile bg_sentences_midx.txt 60 # --infoFile bg_sentences_midx.txt
61 # --variant 13 61 # --variant 13
62 62
63 +#Examples
63 #python3 tagging.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ --outputFileI annot-input_bg_outputI.txt --outputFileII annot-input_bg_outputII.txt --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping --infoFile bg_sentences_midx.txt --variant 13 --S4 --S1 > ../../reports/output_tagging_report.txt 64 #python3 tagging.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ --outputFileI annot-input_bg_outputI.txt --outputFileII annot-input_bg_outputII.txt --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping --infoFile bg_sentences_midx.txt --variant 13 --S4 --S1 > ../../reports/output_tagging_report.txt
64 #python3 predict-annot/bin/tagging/tagging.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ --outputFileI annot-input_bg_outputI_v4.txt --outputFileII annot-input_bg_outputII_v4 --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping --infoFile bg_sentences_midx_v4.txt --variant 13 --S4 --S1 > predict-annot/reports/output_tagging_report_v4.txt 65 #python3 predict-annot/bin/tagging/tagging.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ --outputFileI annot-input_bg_outputI_v4.txt --outputFileII annot-input_bg_outputII_v4 --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping --infoFile bg_sentences_midx_v4.txt --variant 13 --S4 --S1 > predict-annot/reports/output_tagging_report_v4.txt
66 +#python3 predict-annot/bin/tagging/tagging.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ --outputFileI annot-input_bg_outputI_v4.txt --outputFileII annot-input_bg_outputII_v4 --outputFileII annot-input_bg_outputIII_v4 --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping --infoFile bg_sentences_midx_v4.txt --variant 13 --S4 --S1 > predict-annot/reports/annot-input_bg_report_v4.txt
65 67
66 __author__ = 'egaytan' 68 __author__ = 'egaytan'
67 69
...@@ -70,12 +72,13 @@ __author__ = 'egaytan' ...@@ -70,12 +72,13 @@ __author__ = 'egaytan'
70 ########################################## 72 ##########################################
71 73
72 if __name__ == "__main__": 74 if __name__ == "__main__":
73 - # Defining parameters 75 + ########################################### Defining parameters ##########################################
74 parser = OptionParser() 76 parser = OptionParser()
75 parser.add_option("--inputPath", dest="inputPath", help="Path of training data set", metavar="PATH") 77 parser.add_option("--inputPath", dest="inputPath", help="Path of training data set", metavar="PATH")
76 parser.add_option("--outputPath", dest="outputPath", help="Output path to place output files", metavar="PATH") 78 parser.add_option("--outputPath", dest="outputPath", help="Output path to place output files", metavar="PATH")
77 parser.add_option("--outputFileI", dest="outFileI", help="Output tagged file I", metavar="FILE") 79 parser.add_option("--outputFileI", dest="outFileI", help="Output tagged file I", metavar="FILE")
78 parser.add_option("--outputFileII", dest="outFileII", help="Output tagged file II", metavar="FILE") 80 parser.add_option("--outputFileII", dest="outFileII", help="Output tagged file II", metavar="FILE")
81 + parser.add_option("--outputFileIII", dest="outFileIII", help="Output tagged file III", metavar="FILE")
79 parser.add_option("--modelPath", dest="modelPath", help="Path to read CRF model", metavar="PATH") 82 parser.add_option("--modelPath", dest="modelPath", help="Path to read CRF model", metavar="PATH")
80 parser.add_option("--modelName", dest="modelName", help="Model name", metavar="TEXT") 83 parser.add_option("--modelName", dest="modelName", help="Model name", metavar="TEXT")
81 parser.add_option("--infoPath", dest="infoPath", help="Path of GSE-GSM index file", metavar="PATH") 84 parser.add_option("--infoPath", dest="infoPath", help="Path of GSE-GSM index file", metavar="PATH")
...@@ -93,13 +96,14 @@ if __name__ == "__main__": ...@@ -93,13 +96,14 @@ if __name__ == "__main__":
93 parser.error("Any parameter given.") 96 parser.error("Any parameter given.")
94 sys.exit(1) 97 sys.exit(1)
95 98
96 - 99 + ########################################### DISP PARAMETERS ##########################################
97 print('-------------------------------- PARAMETERS --------------------------------') 100 print('-------------------------------- PARAMETERS --------------------------------')
98 101
99 print("--inputPath Path of training data set : " + str(options.inputPath )) 102 print("--inputPath Path of training data set : " + str(options.inputPath ))
100 print("--outputPath Output path to place output files: " + str(options.outputPath )) 103 print("--outputPath Output path to place output files: " + str(options.outputPath ))
101 print("--outputFileI Output tagged file I : " + str(options.outFileI )) 104 print("--outputFileI Output tagged file I : " + str(options.outFileI ))
102 print("--outputFileII Output tagged file II : " + str(options.outFileII )) 105 print("--outputFileII Output tagged file II : " + str(options.outFileII ))
106 + print("--outputFileII Output tagged file III : " + str(options.outFileIII ))
103 print("--modelPath Path to read CRF model : " + str(options.modelPath )) 107 print("--modelPath Path to read CRF model : " + str(options.modelPath ))
104 print("--modelName Model name : " + str(options.modelName )) 108 print("--modelName Model name : " + str(options.modelName ))
105 print("--infoPath Path of GSE-GSM index file : " + str(options.infoPath )) 109 print("--infoPath Path of GSE-GSM index file : " + str(options.infoPath ))
...@@ -115,25 +119,29 @@ if __name__ == "__main__": ...@@ -115,25 +119,29 @@ if __name__ == "__main__":
115 119
116 symbols = ['.', ',', ':', ';', '?', '!', '\'', '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', 120 symbols = ['.', ',', ':', ';', '?', '!', '\'', '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{',
117 '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...'] 121 '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']
118 - 122 + #print("Filtering symbols " + str(symbols) + ': ' + str(options.filterSymbols))
119 - print("Filtering symbols " + str(symbols) + ': ' + str(options.filterSymbols)) 123 + ########################################### PROCESSING ##########################################
120 -
121 print('-------------------------------- PROCESSING --------------------------------') 124 print('-------------------------------- PROCESSING --------------------------------')
122 125
123 stopwords = [word for word in stopwords.words('english')] 126 stopwords = [word for word in stopwords.words('english')]
124 - # Read index 127 + # Read index mapping GSE file information
125 idx = open(os.path.join(options.infoPath, options.idx), "r").readlines() 128 idx = open(os.path.join(options.infoPath, options.idx), "r").readlines()
126 129
127 - # Read CRF model 130 +
131 + ########################################### Read CRF model ##########################################
128 t0 = time() 132 t0 = time()
129 print('Reading CRF model...') 133 print('Reading CRF model...')
130 crf = joblib.load(os.path.join(options.modelPath, options.modelName + '.mod')) 134 crf = joblib.load(os.path.join(options.modelPath, options.modelName + '.mod'))
131 print("Reading CRF model done in: %fs" % (time() - t0)) 135 print("Reading CRF model done in: %fs" % (time() - t0))
132 136
133 - # Reading sentences 137 +
138 + ########################################### Reading sentences ##########################################
134 print('Processing corpus...') 139 print('Processing corpus...')
135 t0 = time() 140 t0 = time()
136 labels = list(['Gtype', 'Gversion', 'Med', 'Phase', 'Strain', 'Substrain', 'Supp', 'Technique', 'Temp', 'OD', 'Anti', 'Agit', 'Air', 'Vess', 'pH']) 141 labels = list(['Gtype', 'Gversion', 'Med', 'Phase', 'Strain', 'Substrain', 'Supp', 'Technique', 'Temp', 'OD', 'Anti', 'Agit', 'Air', 'Vess', 'pH'])
142 +
143 +
144 + #####################################################################################
137 # Walk directory to read files 145 # Walk directory to read files
138 for path, dirs, files in os.walk(options.inputPath): 146 for path, dirs, files in os.walk(options.inputPath):
139 # For each file in dir 147 # For each file in dir
...@@ -165,25 +173,24 @@ if __name__ == "__main__": ...@@ -165,25 +173,24 @@ if __name__ == "__main__":
165 print("Sentences input data: " + str(len(sentencesInputData))) 173 print("Sentences input data: " + str(len(sentencesInputData)))
166 174
167 175
168 - # Predicting tags 176 + ########################################### Predicting tags ###########################################
169 t1 = time() 177 t1 = time()
170 - print("Predicting tags with model") 178 + print("Predicting tags with model...")
171 y_pred = crf.predict(X_input) 179 y_pred = crf.predict(X_input)
172 print("Prediction done in: %fs" % (time() - t1)) 180 print("Prediction done in: %fs" % (time() - t1))
173 181
174 182
175 - # Tagging with CRF model 183 + ########################################### Tagging with CRF model ###########################################
176 - print("Tagging file") 184 + print("Tagging file...")
177 lidx = 0 185 lidx = 0
178 for line, tagLine in zip(lines, y_pred): 186 for line, tagLine in zip(lines, y_pred):
179 Ltags = set(labels).intersection(set(tagLine)) 187 Ltags = set(labels).intersection(set(tagLine))
180 outputLine = '' 188 outputLine = ''
181 line = line.strip('\n') 189 line = line.strip('\n')
182 - 190 +
183 - #print("\nLine: " + str(line))
184 - #print ("CRF tagged line: " + str(tagLine))
185 tb = 'O' 191 tb = 'O'
186 i = 0 192 i = 0
193 + ########################## one word sentences ##########################
187 if len(tagLine)==1: 194 if len(tagLine)==1:
188 if tagLine[0] in labels: 195 if tagLine[0] in labels:
189 start = '<' + tagLine[0] + '> ' 196 start = '<' + tagLine[0] + '> '
...@@ -192,9 +199,11 @@ if __name__ == "__main__": ...@@ -192,9 +199,11 @@ if __name__ == "__main__":
192 outputLine = start + word + end 199 outputLine = start + word + end
193 else: 200 else:
194 outputLine = line.split(' ')[0] 201 outputLine = line.split(' ')[0]
195 - #print(outputLine + '\t' + ', '.join(Ltags)) 202 + ########################## Saving Sentence Ouput I ##########################
196 - sentencesOutputDataI.append(idx[lidx].replace('\n','\t') + outputLine + ', '.join(Ltags)) 203 + sentencesOutputDataI.append(idx[lidx].replace('\n','\t') + outputLine + '\t' + ', '.join(Ltags))
204 + ########################## Saving Sentence Ouput II ##########################
197 sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + word.split('|')[0] + '\t' + tag) 205 sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + word.split('|')[0] + '\t' + tag)
206 + lidx += 1
198 continue 207 continue
199 208
200 sentence = '' 209 sentence = ''
...@@ -216,6 +225,7 @@ if __name__ == "__main__": ...@@ -216,6 +225,7 @@ if __name__ == "__main__":
216 # end sentence 225 # end sentence
217 outputLine += word.split('|')[0] + ' ' 226 outputLine += word.split('|')[0] + ' '
218 outputLine += '</' + tag + '/> ' 227 outputLine += '</' + tag + '/> '
228 + ########################## Saving Sentence Ouput II ##########################
219 sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + sentence + word.split('|')[0] + '\t' +tag) 229 sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + sentence + word.split('|')[0] + '\t' +tag)
220 sb = False 230 sb = False
221 tb = 'O' 231 tb = 'O'
...@@ -225,6 +235,7 @@ if __name__ == "__main__": ...@@ -225,6 +235,7 @@ if __name__ == "__main__":
225 # start new tag 235 # start new tag
226 outputLine += word.split('|')[0] + ' ' 236 outputLine += word.split('|')[0] + ' '
227 outputLine += '</' + tag + '/> ' 237 outputLine += '</' + tag + '/> '
238 + ########################## Saving Sentence Ouput II ##########################
228 sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + sentence + word.split('|')[0] + '\t' +tag) 239 sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + sentence + word.split('|')[0] + '\t' +tag)
229 sb = False 240 sb = False
230 tb = 'O' 241 tb = 'O'
...@@ -235,21 +246,32 @@ if __name__ == "__main__": ...@@ -235,21 +246,32 @@ if __name__ == "__main__":
235 i += 1 246 i += 1
236 if sb: 247 if sb:
237 sentence+= word.split('|')[0] + ' ' 248 sentence+= word.split('|')[0] + ' '
238 - #print(outputLine + '\t' + ', '.join(Ltags)) 249 + ########################## Saving Sentence Ouput I ##########################
239 - sentencesOutputDataI.append(idx[lidx].replace('\n', '\t') + outputLine+ ', '.join(Ltags)) 250 + sentencesOutputDataI.append(idx[lidx].replace('\n', '\t') + outputLine+ '\t' +', '.join(Ltags))
240 - lidx += 1 251 + lidx += 1
241 - 252 +
242 - #print( DF(sentencesOutputDataI) ) 253 + ########################################### Save Output I ##########################################
243 - #print( '\n'.join(sentencesOutputDataII) ) 254 + print("Saving Ouput I...")
244 - # Save tags
245 - with open(os.path.join(options.outputPath, options.outFileII + '_' + options.modelName + '.tsv'), "w") as oFile:
246 - for line in sentencesOutputDataII:
247 - #print(line)
248 - oFile.write(line + '\n')
249 with open(os.path.join(options.outputPath, options.outFileI + '_' + options.modelName + '.tsv'), "w") as oFileI: 255 with open(os.path.join(options.outputPath, options.outFileI + '_' + options.modelName + '.tsv'), "w") as oFileI:
250 for line in sentencesOutputDataI: 256 for line in sentencesOutputDataI:
251 if re.findall('</', line): 257 if re.findall('</', line):
252 - print(line) 258 + #print(line)
253 - #oFileI.write(line + '\n') 259 + oline = line.replace('LDR','(')
260 + oline = oline.replace('RDR',')')
261 + oFileI.write(oline + '\n')
262 + ########################################### Save Output II ##########################################
263 + print("Saving Ouput II...")
264 + with open(os.path.join(options.outputPath, options.outFileII + '_' + options.modelName + '.tsv'), "w") as oFileII:
265 + for line in sentencesOutputDataII:
266 + #print(line)
267 + oline = line.replace('LDR','(')
268 + oline = oline.replace('RDR',')')
269 + oFileII.write(oline + '\n')
270 + ########################################### Save Output III ##########################################
271 + print("Saving Ouput III...")
272 + with open(os.path.join(options.outputPath, options.outFileIII + '_' + options.modelName + '.tsv'), "w") as oFileIII:
273 + for line, tagLine in zip(lines, y_pred):
274 + oline = [ w.split('|')[0].replace('LDR','(').replace('LDR','(')+'|'+tag for w,tag in zip(line.split(' '), tagLine)]
254 275
276 + oFileIII.write(' '.join(oline) + '\n')
255 print("Processing corpus done in: %fs" % (time() - t0)) 277 print("Processing corpus done in: %fs" % (time() - t0))
......
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
...@@ -9328,7 +9328,7 @@ GSE12006 GSM303526 GPL3154-PMID:18940002 characteristics_ch1.1 ...@@ -9328,7 +9328,7 @@ GSE12006 GSM303526 GPL3154-PMID:18940002 characteristics_ch1.1
9328 GSE12006 GSM303526 GPL3154-PMID:18940002 growth_protocol_ch1.1 9328 GSE12006 GSM303526 GPL3154-PMID:18940002 growth_protocol_ch1.1
9329 GSE12006 GSM303526 GPL3154-PMID:18940002 growth_protocol_ch1.2 9329 GSE12006 GSM303526 GPL3154-PMID:18940002 growth_protocol_ch1.2
9330 GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.4 9330 GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.4
9331 -GSE12006 GSM303527 GPL3154-PMID:18940002 9331 +GSE12006 GSM303527 GPL3154-PMID:18940002 extract_protocol_ch1.3
9332 GSE12006 GSM303527 GPL3154-PMID:18940002 title.1 9332 GSE12006 GSM303527 GPL3154-PMID:18940002 title.1
9333 GSE12006 GSM303527 GPL3154-PMID:18940002 source_name_ch1.1 9333 GSE12006 GSM303527 GPL3154-PMID:18940002 source_name_ch1.1
9334 GSE12006 GSM303527 GPL3154-PMID:18940002 organism_ch1.1 9334 GSE12006 GSM303527 GPL3154-PMID:18940002 organism_ch1.1
...@@ -9340,7 +9340,7 @@ GSE12006 GSM303527 GPL3154-PMID:18940002 characteristics_ch1.1 ...@@ -9340,7 +9340,7 @@ GSE12006 GSM303527 GPL3154-PMID:18940002 characteristics_ch1.1
9340 GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.1 9340 GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.1
9341 GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.2 9341 GSE12006 GSM303527 GPL3154-PMID:18940002 growth_protocol_ch1.2
9342 GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.4 9342 GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.4
9343 -GSE12006 GSM303528 GPL3154-PMID:18940002 9343 +GSE12006 GSM303528 GPL3154-PMID:18940002 extract_protocol_ch1.3
9344 GSE12006 GSM303528 GPL3154-PMID:18940002 title.1 9344 GSE12006 GSM303528 GPL3154-PMID:18940002 title.1
9345 GSE12006 GSM303528 GPL3154-PMID:18940002 source_name_ch1.1 9345 GSE12006 GSM303528 GPL3154-PMID:18940002 source_name_ch1.1
9346 GSE12006 GSM303528 GPL3154-PMID:18940002 organism_ch1.1 9346 GSE12006 GSM303528 GPL3154-PMID:18940002 organism_ch1.1
...@@ -9352,7 +9352,7 @@ GSE12006 GSM303528 GPL3154-PMID:18940002 characteristics_ch1.1 ...@@ -9352,7 +9352,7 @@ GSE12006 GSM303528 GPL3154-PMID:18940002 characteristics_ch1.1
9352 GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.1 9352 GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.1
9353 GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.2 9353 GSE12006 GSM303528 GPL3154-PMID:18940002 growth_protocol_ch1.2
9354 GSE12006 GSM303529 GPL3154-PMID:18940002 growth_protocol_ch1.4 9354 GSE12006 GSM303529 GPL3154-PMID:18940002 growth_protocol_ch1.4
9355 -GSE12006 GSM303529 GPL3154-PMID:18940002 9355 +GSE12006 GSM303529 GPL3154-PMID:18940002 extract_protocol_ch1.3
9356 GSE12006 GSM303529 GPL3154-PMID:18940002 title.1 9356 GSE12006 GSM303529 GPL3154-PMID:18940002 title.1
9357 GSE12006 GSM303529 GPL3154-PMID:18940002 source_name_ch1.1 9357 GSE12006 GSM303529 GPL3154-PMID:18940002 source_name_ch1.1
9358 GSE12006 GSM303529 GPL3154-PMID:18940002 organism_ch1.1 9358 GSE12006 GSM303529 GPL3154-PMID:18940002 organism_ch1.1
......
This diff could not be displayed because it is too large.
1 --------------------------------- PARAMETERS --------------------------------
2 -Path of CoreNLP output: /home/egaytan/automatic-extraction-growth-conditions/CoreNLP/output/annotation
3 -File with CoreNLP-tagging bg-sentences: bg_sentences_v3.txt.ner
4 -Path to save data set: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input
5 -File to save recontrsucted bg-sentences: annot-input_bg_v3.txt
6 --------------------------------- PROCESSING --------------------------------
7 -Number of sentences: 14716
8 -==================================END===================================
1 -------------------------------- PARAMETERS -------------------------------- 1 -------------------------------- PARAMETERS --------------------------------
2 -Path of CoreNLP output: /home/egaytan/automatic-extraction-growth-conditions/CoreNLP/output/annotation 2 +--inputPath Path of training data set : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/
3 -File with CoreNLP-tagging bg-sentences: bg_sentences_v4.txt.ner 3 +--outputPath Output path to place output files: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/
4 -Path to save data set: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input 4 +--outputFileI Output tagged file I : annot-input_bg_outputI_v4
5 -File to save recontrsucted bg-sentences: annot-input_bg_v4.txt 5 +--outputFileII Output tagged file II : annot-input_bg_outputII_v4
6 +--outputFileII Output tagged file III : annot-input_bg_outputIII_v4
7 +--modelPath Path to read CRF model : /home/egaytan/automatic-extraction-growth-conditions/CRF/models
8 +--modelName Model name : model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10
9 +--infoPath Path of GSE-GSM index file : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping
10 +--infoFile GSE-GSM index file : bg_sentences_midx_v4.txt
11 +--variant Run variant : 13
12 +--S1 General features : True
13 +--S2 Inner/Complete word features : False
14 +--S3 Extended context features : False
15 +--S4 Semantic features : True
16 +--filteringStopWords Filtering stop words : False
17 +--filterSymbols Filtering punctuation marks : False
6 -------------------------------- PROCESSING -------------------------------- 18 -------------------------------- PROCESSING --------------------------------
7 -Number of sentences: 90904 19 +Reading CRF model...
8 -==================================END=================================== 20 +Reading CRF model done in: 0.009463s
21 +Processing corpus...
22 +Preprocessing file...annot-input_bg_v4.txt
23 +Sentences input data: 90688
24 +Predicting tags with model...
25 +Prediction done in: 26.367272s
26 +Tagging file...
27 +Saving Ouput I...
28 +Saving Ouput II...
29 +Saving Ouput III...
30 +Processing corpus done in: 56.584394s
......
1 --------------------------------- PARAMETERS --------------------------------
2 ---inputPath Path of training data set : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/
3 ---outputPath Output path to place output files: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/
4 ---outputFileI Output tagged file I : annot-input_bg_outputI.txt
5 ---outputFileII Output tagged file II : annot-input_bg_outputII.txt
6 ---modelPath Path to read CRF model : /home/egaytan/automatic-extraction-growth-conditions/CRF/models
7 ---modelName Model name : model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10
8 ---infoPath Path of GSE-GSM index file : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping
9 ---infoFile GSE-GSM index file : bg_sentences_midx.txt
10 ---variant Run variant : 13
11 ---S1 General features : True
12 ---S2 Inner/Complete word features : False
13 ---S3 Extended context features : False
14 ---S4 Semantic features : True
15 ---filteringStopWords Filtering stop words : False
16 ---filterSymbols Filtering punctuation marks : False
17 -Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False
18 --------------------------------- PROCESSING --------------------------------
19 -Reading CRF model...
20 -Reading CRF model done in: 0.008336s
21 -Processing corpus...
22 -Preprocessing file...annot-input_bg_v3.txt
23 -Sentences input data: 14716
24 -Predicting tags with model
25 -Prediction done in: 1.688127s
26 -Tagging file
27 -Processing corpus done in: 3.948320s
...@@ -17,10 +17,16 @@ ...@@ -17,10 +17,16 @@
17 Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False 17 Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False
18 -------------------------------- PROCESSING -------------------------------- 18 -------------------------------- PROCESSING --------------------------------
19 Reading CRF model... 19 Reading CRF model...
20 -Reading CRF model done in: 0.009804s 20 +Reading CRF model done in: 0.009363s
21 Processing corpus... 21 Processing corpus...
22 Preprocessing file...annot-input_bg_v3.txt 22 Preprocessing file...annot-input_bg_v3.txt
23 Sentences input data: 14716 23 Sentences input data: 14716
24 Predicting tags with model 24 Predicting tags with model
25 -Prediction done in: 1.811103s 25 +Prediction done in: 1.737334s
26 Tagging file 26 Tagging file
27 +Preprocessing file...annot-input_bg_v4.txt
28 +Sentences input data: 90688
29 +Predicting tags with model
30 +Prediction done in: 26.434549s
31 +Tagging file
32 +Processing corpus done in: 58.304885s
......