Carlos-Francisco Méndez-Cruz

Extract manually tagged GCs

...@@ -79,6 +79,11 @@ if __name__ == "__main__": ...@@ -79,6 +79,11 @@ if __name__ == "__main__":
79 tag = m.group('tag') 79 tag = m.group('tag')
80 content = m.group('content') 80 content = m.group('content')
81 content = content.strip() 81 content = content.strip()
82 + content = content.replace("&", "&")
83 + content = content.replace("&lt;", "<")
84 + content = content.replace("&gt;", ">")
85 + content = content.replace("&quot;", "\"")
86 + content = content.replace("&apos;", "\'")
82 print("\nSerie: {}\tSample: {}\tTag: {}\tContent: {}".format(serie, sample, tag, content.encode(encoding='utf-8', errors='replace'))) 87 print("\nSerie: {}\tSample: {}\tTag: {}\tContent: {}".format(serie, sample, tag, content.encode(encoding='utf-8', errors='replace')))
83 if tag in hashGcs[serie][sample]: 88 if tag in hashGcs[serie][sample]:
84 if content in hashGcs[serie][sample][tag]: 89 if content in hashGcs[serie][sample][tag]:
......