Showing
6 changed files
with
9 additions
and
4 deletions
... | @@ -138,7 +138,8 @@ if __name__ == "__main__": | ... | @@ -138,7 +138,8 @@ if __name__ == "__main__": |
138 | sentencesOutputDataI = [] | 138 | sentencesOutputDataI = [] |
139 | # Preprocessing input sentences | 139 | # Preprocessing input sentences |
140 | with open(os.path.join(options.inputPath, file), "r") as iFile: | 140 | with open(os.path.join(options.inputPath, file), "r") as iFile: |
141 | - sentencesInputData = [ line.strip('\n').split() for line in iFile] | 141 | + lines = iFile.readlines() |
142 | + sentencesInputData = [ line.strip('\n').split() for line in lines] | ||
142 | # Save input sentences | 143 | # Save input sentences |
143 | X_input = [training.sent2features(s, options.S1, options.S2, options.S3, options.S4, options.variant) for s in sentencesInputData] | 144 | X_input = [training.sent2features(s, options.S1, options.S2, options.S3, options.S4, options.variant) for s in sentencesInputData] |
144 | print("Sentences input data: " + str(len(sentencesInputData))) | 145 | print("Sentences input data: " + str(len(sentencesInputData))) |
... | @@ -148,12 +149,13 @@ if __name__ == "__main__": | ... | @@ -148,12 +149,13 @@ if __name__ == "__main__": |
148 | print("Predicting tags with model...") | 149 | print("Predicting tags with model...") |
149 | y_pred = crf.predict(X_input) | 150 | y_pred = crf.predict(X_input) |
150 | 151 | ||
152 | + #print(y_pred) | ||
151 | print("Prediction done in: %fs" % (time() - t1)) | 153 | print("Prediction done in: %fs" % (time() - t1)) |
152 | 154 | ||
153 | ########################################### Tagging with CRF model ########################################### | 155 | ########################################### Tagging with CRF model ########################################### |
154 | print("Tagging file...") | 156 | print("Tagging file...") |
155 | lidx = 0 | 157 | lidx = 0 |
156 | - for line, tagLine in zip(iFile.readlines(), y_pred): | 158 | + for line, tagLine in zip(lines, y_pred): |
157 | # unique tags | 159 | # unique tags |
158 | Ltags = set(labels).intersection(set(tagLine)) | 160 | Ltags = set(labels).intersection(set(tagLine)) |
159 | # Skip untagged sentence | 161 | # Skip untagged sentence |
... | @@ -178,6 +180,7 @@ if __name__ == "__main__": | ... | @@ -178,6 +180,7 @@ if __name__ == "__main__": |
178 | else: | 180 | else: |
179 | outputLine = line.split(' ')[0] | 181 | outputLine = line.split(' ')[0] |
180 | # Saving Sentence Ouput I | 182 | # Saving Sentence Ouput I |
183 | + print(outputLine) | ||
181 | sentencesOutputDataI.append(idx[lidx].replace('\n','\t') + outputLine + '\t' + ', '.join(Ltags)) | 184 | sentencesOutputDataI.append(idx[lidx].replace('\n','\t') + outputLine + '\t' + ', '.join(Ltags)) |
182 | # Increase sentence counter | 185 | # Increase sentence counter |
183 | lidx += 1 | 186 | lidx += 1 |
... | @@ -212,8 +215,10 @@ if __name__ == "__main__": | ... | @@ -212,8 +215,10 @@ if __name__ == "__main__": |
212 | outputLine += word.split('|')[0] + ' ' | 215 | outputLine += word.split('|')[0] + ' ' |
213 | i += 1 | 216 | i += 1 |
214 | # Saving Sentence Ouput I | 217 | # Saving Sentence Ouput I |
218 | + print(outputLine) | ||
215 | sentencesOutputDataI.append(idx[lidx].replace('\n', '\t') + outputLine+ '\t' +', '.join(Ltags)) | 219 | sentencesOutputDataI.append(idx[lidx].replace('\n', '\t') + outputLine+ '\t' +', '.join(Ltags)) |
216 | lidx += 1 | 220 | lidx += 1 |
221 | + | ||
217 | print("\n".join(sentencesOutputDataI[1:3])) | 222 | print("\n".join(sentencesOutputDataI[1:3])) |
218 | ########################################### Save Output I ########################################## | 223 | ########################################### Save Output I ########################################## |
219 | print("Saving Ouput I...") | 224 | print("Saving Ouput I...") |
... | @@ -239,7 +244,7 @@ if __name__ == "__main__": | ... | @@ -239,7 +244,7 @@ if __name__ == "__main__": |
239 | ########################################### Save Output III ########################################## | 244 | ########################################### Save Output III ########################################## |
240 | print("Saving Ouput III...") | 245 | print("Saving Ouput III...") |
241 | with open(os.path.join(options.outputPath, options.outFileIII + '_' + options.modelName + '.tsv'), "w") as oFileIII: | 246 | with open(os.path.join(options.outputPath, options.outFileIII + '_' + options.modelName + '.tsv'), "w") as oFileIII: |
242 | - for line, tagLine in zip(iFile.readlines(), y_pred): | 247 | + for line, tagLine in zip(lines, y_pred): |
243 | oline = [ w.split('|')[0].replace('LDR','(').replace('LDR','(')+'|'+tag for w,tag in zip(line.split(' '), tagLine)] | 248 | oline = [ w.split('|')[0].replace('LDR','(').replace('LDR','(')+'|'+tag for w,tag in zip(line.split(' '), tagLine)] |
244 | 249 | ||
245 | oFileIII.write(' '.join(oline) + '\n') | 250 | oFileIII.write(' '.join(oline) + '\n') |
... | @@ -249,4 +254,4 @@ if __name__ == "__main__": | ... | @@ -249,4 +254,4 @@ if __name__ == "__main__": |
249 | # from https://stackoverflow.com/questions/7100125/storing-python-dictionaries | 254 | # from https://stackoverflow.com/questions/7100125/storing-python-dictionaries |
250 | with open(os.path.join(options.outputPath, 'crf_probs.json'), 'w') as fp: | 255 | with open(os.path.join(options.outputPath, 'crf_probs.json'), 'w') as fp: |
251 | json.dump(y_probs, fp) | 256 | json.dump(y_probs, fp) |
252 | - print("Processing corpus done in: %fs" % (time() - t0)) | 257 | + print("Pssing corpus done in: %fs" % (time() - t0)) | ... | ... |
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
File mode changed
This diff could not be displayed because it is too large.
-
Please register or login to post a comment