Showing
6 changed files
with
39 additions
and
34 deletions
... | @@ -138,7 +138,8 @@ if __name__ == "__main__": | ... | @@ -138,7 +138,8 @@ if __name__ == "__main__": |
138 | sentencesOutputDataI = [] | 138 | sentencesOutputDataI = [] |
139 | # Preprocessing input sentences | 139 | # Preprocessing input sentences |
140 | with open(os.path.join(options.inputPath, file), "r") as iFile: | 140 | with open(os.path.join(options.inputPath, file), "r") as iFile: |
141 | - sentencesInputData = [ line.strip('\n').split() for line in iFile] | 141 | + lines = iFile.readlines() |
142 | + sentencesInputData = [ line.strip('\n').split() for line in lines] | ||
142 | # Save input sentences | 143 | # Save input sentences |
143 | X_input = [training.sent2features(s, options.S1, options.S2, options.S3, options.S4, options.variant) for s in sentencesInputData] | 144 | X_input = [training.sent2features(s, options.S1, options.S2, options.S3, options.S4, options.variant) for s in sentencesInputData] |
144 | print("Sentences input data: " + str(len(sentencesInputData))) | 145 | print("Sentences input data: " + str(len(sentencesInputData))) |
... | @@ -148,12 +149,13 @@ if __name__ == "__main__": | ... | @@ -148,12 +149,13 @@ if __name__ == "__main__": |
148 | print("Predicting tags with model...") | 149 | print("Predicting tags with model...") |
149 | y_pred = crf.predict(X_input) | 150 | y_pred = crf.predict(X_input) |
150 | 151 | ||
152 | + #print(y_pred) | ||
151 | print("Prediction done in: %fs" % (time() - t1)) | 153 | print("Prediction done in: %fs" % (time() - t1)) |
152 | 154 | ||
153 | ########################################### Tagging with CRF model ########################################### | 155 | ########################################### Tagging with CRF model ########################################### |
154 | print("Tagging file...") | 156 | print("Tagging file...") |
155 | lidx = 0 | 157 | lidx = 0 |
156 | - for line, tagLine in zip(iFile.readlines(), y_pred): | 158 | + for line, tagLine in zip(lines, y_pred): |
157 | # unique tags | 159 | # unique tags |
158 | Ltags = set(labels).intersection(set(tagLine)) | 160 | Ltags = set(labels).intersection(set(tagLine)) |
159 | # Skip untagged sentence | 161 | # Skip untagged sentence |
... | @@ -178,6 +180,7 @@ if __name__ == "__main__": | ... | @@ -178,6 +180,7 @@ if __name__ == "__main__": |
178 | else: | 180 | else: |
179 | outputLine = line.split(' ')[0] | 181 | outputLine = line.split(' ')[0] |
180 | # Saving Sentence Ouput I | 182 | # Saving Sentence Ouput I |
183 | + print(outputLine) | ||
181 | sentencesOutputDataI.append(idx[lidx].replace('\n','\t') + outputLine + '\t' + ', '.join(Ltags)) | 184 | sentencesOutputDataI.append(idx[lidx].replace('\n','\t') + outputLine + '\t' + ', '.join(Ltags)) |
182 | # Increase sentence counter | 185 | # Increase sentence counter |
183 | lidx += 1 | 186 | lidx += 1 |
... | @@ -212,41 +215,43 @@ if __name__ == "__main__": | ... | @@ -212,41 +215,43 @@ if __name__ == "__main__": |
212 | outputLine += word.split('|')[0] + ' ' | 215 | outputLine += word.split('|')[0] + ' ' |
213 | i += 1 | 216 | i += 1 |
214 | # Saving Sentence Ouput I | 217 | # Saving Sentence Ouput I |
218 | + print(outputLine) | ||
215 | sentencesOutputDataI.append(idx[lidx].replace('\n', '\t') + outputLine+ '\t' +', '.join(Ltags)) | 219 | sentencesOutputDataI.append(idx[lidx].replace('\n', '\t') + outputLine+ '\t' +', '.join(Ltags)) |
216 | lidx += 1 | 220 | lidx += 1 |
221 | + | ||
217 | print("\n".join(sentencesOutputDataI[1:3])) | 222 | print("\n".join(sentencesOutputDataI[1:3])) |
218 | - ########################################### Save Output I ########################################## | 223 | + ########################################### Save Output I ########################################## |
219 | - print("Saving Ouput I...") | 224 | + print("Saving Ouput I...") |
220 | - with open(os.path.join(options.outputPath, options.outFileI + '_' + options.modelName + '.tsv'), "w") as oFileI: | 225 | + with open(os.path.join(options.outputPath, options.outFileI + '_' + options.modelName + '.tsv'), "w") as oFileI: |
221 | - for line in sentencesOutputDataI: | 226 | + for line in sentencesOutputDataI: |
222 | - if re.findall('</', line): | 227 | + if re.findall('</', line): |
223 | - #print(line) | 228 | + #print(line) |
224 | - oline = line.replace('LDR','(') | ||
225 | - oline = oline.replace('RDR',')') | ||
226 | - oFileI.write(oline + '\n') | ||
227 | - | ||
228 | - ########################################### Save Output II ########################################## | ||
229 | - print("Saving Ouput II...") | ||
230 | - with open(os.path.join(options.outputPath, options.outFileII + '_' + options.modelName + '.tsv'), "w") as oFileII: | ||
231 | - for line in sentencesOutputDataI: | ||
232 | oline = line.replace('LDR','(') | 229 | oline = line.replace('LDR','(') |
233 | oline = oline.replace('RDR',')') | 230 | oline = oline.replace('RDR',')') |
234 | - for ttex, tag in re.findall(r'<[^>]+>([^<]+)</([^>]+)>', oline): | 231 | + oFileI.write(oline + '\n') |
235 | - lline = oline.split('\t')[0:-2] + [ttex, tag] | ||
236 | - nline = '\t'.join(lline) | ||
237 | - oFileII.write(nline + '\n') | ||
238 | - | ||
239 | - ########################################### Save Output III ########################################## | ||
240 | - print("Saving Ouput III...") | ||
241 | - with open(os.path.join(options.outputPath, options.outFileIII + '_' + options.modelName + '.tsv'), "w") as oFileIII: | ||
242 | - for line, tagLine in zip(iFile.readlines(), y_pred): | ||
243 | - oline = [ w.split('|')[0].replace('LDR','(').replace('LDR','(')+'|'+tag for w,tag in zip(line.split(' '), tagLine)] | ||
244 | 232 | ||
245 | - oFileIII.write(' '.join(oline) + '\n') | 233 | + ########################################### Save Output II ########################################## |
246 | - | 234 | + print("Saving Ouput II...") |
247 | - ########################################### Save Probs ########################################## | 235 | + with open(os.path.join(options.outputPath, options.outFileII + '_' + options.modelName + '.tsv'), "w") as oFileII: |
248 | - y_probs = crf.predict_marginals(X_input) | 236 | + for line in sentencesOutputDataI: |
249 | - # from https://stackoverflow.com/questions/7100125/storing-python-dictionaries | 237 | + oline = line.replace('LDR','(') |
250 | - with open(os.path.join(options.outputPath, 'crf_probs.json'), 'w') as fp: | 238 | + oline = oline.replace('RDR',')') |
251 | - json.dump(y_probs, fp) | 239 | + for ttex, tag in re.findall(r'<[^>]+>([^<]+)</([^>]+)>', oline): |
252 | - print("Processing corpus done in: %fs" % (time() - t0)) | 240 | + lline = oline.split('\t')[0:-2] + [ttex, tag] |
241 | + nline = '\t'.join(lline) | ||
242 | + oFileII.write(nline + '\n') | ||
243 | + | ||
244 | + ########################################### Save Output III ########################################## | ||
245 | + print("Saving Ouput III...") | ||
246 | + with open(os.path.join(options.outputPath, options.outFileIII + '_' + options.modelName + '.tsv'), "w") as oFileIII: | ||
247 | + for line, tagLine in zip(lines, y_pred): | ||
248 | + oline = [ w.split('|')[0].replace('LDR','(').replace('LDR','(')+'|'+tag for w,tag in zip(line.split(' '), tagLine)] | ||
249 | + | ||
250 | + oFileIII.write(' '.join(oline) + '\n') | ||
251 | + | ||
252 | + ########################################### Save Probs ########################################## | ||
253 | + y_probs = crf.predict_marginals(X_input) | ||
254 | + # from https://stackoverflow.com/questions/7100125/storing-python-dictionaries | ||
255 | + with open(os.path.join(options.outputPath, 'crf_probs.json'), 'w') as fp: | ||
256 | + json.dump(y_probs, fp) | ||
257 | + print("Pssing corpus done in: %fs" % (time() - t0)) | ... | ... |
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
File mode changed
This diff could not be displayed because it is too large.
-
Please register or login to post a comment