Showing
4 changed files
with
808 additions
and
0 deletions
| 1 | +import stanza | ||
| 2 | +import argparse | ||
| 3 | +import re | ||
| 4 | +import os | ||
| 5 | +import pandas as pd | ||
| 6 | + | ||
| 7 | +# Objective | ||
| 8 | +# Sentences extraction from XML Soft files. | ||
| 9 | +# | ||
| 10 | +# Input parameters | ||
| 11 | +# --inputPath=PATH Path to XML Soft files | ||
| 12 | +# --outputPath=PATH Path to place output files | ||
| 13 | +# | ||
| 14 | +# Output | ||
| 15 | +# Files with sentences obtained from XML Soft files | ||
| 16 | +# | ||
| 17 | +# Examples | ||
| 18 | +# python extract-sentences-from-softfiles.py | ||
| 19 | +# --inputPath /home/cmendezc/Documents/ccg/gitlab-automatic-extraction-growth-conditions/data-sets/data-curation/tagged-xml-data | ||
| 20 | +# --outputPath /home/cmendezc/Documents/ccg/gitlab-automatic-extraction-growth-conditions/data-sets/data-curation/curated-sentences | ||
| 21 | +# | ||
| 22 | +# python extract-sentences-from-softfiles.py --inputPath /home/cmendezc/Documents/ccg/gitlab-automatic-extraction-growth-conditions/data-sets/data-curation/tagged-xml-data --outputPath /home/cmendezc/Documents/ccg/gitlab-automatic-extraction-growth-conditions/data-sets/data-curation/curated-sentences | ||
| 23 | + | ||
| 24 | +########################################## | ||
| 25 | +# MAIN PROGRAM # | ||
| 26 | +########################################## | ||
| 27 | + | ||
| 28 | +if __name__ == "__main__": | ||
| 29 | + # Defining parameters | ||
| 30 | + parser = argparse.ArgumentParser( | ||
| 31 | + prog='extract-sentences-from-softfiles', | ||
| 32 | + description='Sentences extraction from XML Soft files.', | ||
| 33 | + epilog='') | ||
| 34 | + parser.add_argument("--inputPath", dest="inputPath", | ||
| 35 | + help="Path to XML Soft files", metavar="PATH") | ||
| 36 | + parser.add_argument("--outputPath", dest="outputPath", | ||
| 37 | + help="Path for output files", metavar="PATH") | ||
| 38 | + | ||
| 39 | + args = parser.parse_args() | ||
| 40 | + | ||
| 41 | + print('-------------------------------- PARAMETERS --------------------------------') | ||
| 42 | + print("Path to XML Soft files: " + args.inputPath) | ||
| 43 | + print("Path to output files: " + args.outputPath) | ||
| 44 | + print('-------------------------------- PROCESSING --------------------------------') | ||
| 45 | + | ||
| 46 | + ## Tags of GCs into consideration | ||
| 47 | + # culture medium, medium supplements, aeration, temperature, | ||
| 48 | + # pH, agitation, growth phase, optical density, genetic background | ||
| 49 | + tags = { | ||
| 50 | + '<Gtype>': 'Gtype', | ||
| 51 | + # '<Gversion>': 'Gversion', | ||
| 52 | + '<Med>': 'Med', | ||
| 53 | + '<Phase>': 'Phase', | ||
| 54 | + # '<Substrain>': 'Substrain', | ||
| 55 | + '<Supp>': 'Supp', | ||
| 56 | + # '<Strain>': 'Strain', | ||
| 57 | + # '<Technique>': 'Technique', | ||
| 58 | + '<Temp>': 'Temp', | ||
| 59 | + '<OD>': 'OD', | ||
| 60 | + '<Anti>': 'Anti', | ||
| 61 | + '<Agit>': 'Agit', | ||
| 62 | + '<Air>': 'Air', | ||
| 63 | + '<Vess>': 'Vess', | ||
| 64 | + '<pH>': 'pH' | ||
| 65 | + } | ||
| 66 | + #tags = ['<Gtype>', '<Med>', '<Phase>', '<Supp>', | ||
| 67 | + # '<Temp>', '<OD>', '<Anti>', '<Agit>', | ||
| 68 | + # '<Air>', '<Vess>', '<pH>'] | ||
| 69 | + #deleted_tags = ['<Gversion>', '<Substrain>', '<Strain>', '<Technique>'] | ||
| 70 | + tags = ['Gtype', 'Med', 'Phase', 'Supp', | ||
| 71 | + 'Temp', 'OD', 'Anti', 'Agit', | ||
| 72 | + 'Air', 'Vess', 'pH'] | ||
| 73 | + deleted_tags = ['Gversion', 'Substrain', 'Strain', 'Technique'] | ||
| 74 | + all_tags = tags + deleted_tags | ||
| 75 | + # Regex to check if line has a tag | ||
| 76 | + regex_has_tag = re.compile(r'<(' + '|'.join(all_tags) + r')>') | ||
| 77 | + # Regex to delete tags | ||
| 78 | + regex_delete_tag = re.compile(r'</?(' + '|'.join(deleted_tags) + r')>') | ||
| 79 | + # Regex to substitute tags | ||
| 80 | + regex_subs_ini_tag = re.compile(r'<(?P<tag>(' + '|'.join(tags) + r'))>') | ||
| 81 | + regex_subs_end_tag = re.compile(r'</(?P<tag>(' + '|'.join(tags) + r'))>') | ||
| 82 | + #p = re.compile(r'blue (?P<animal>dog|cat)') | ||
| 83 | + #p.sub(r'gray \g<animal>', s) | ||
| 84 | + # Regex to tag GCs | ||
| 85 | + regex_gc_ini_tag = re.compile(r'INI_(?P<tag>(' + '|'.join(tags) + r'))') | ||
| 86 | + regex_gc_end_tag = re.compile(r'END_(?P<tag>(' + '|'.join(tags) + r'))') | ||
| 87 | + | ||
| 88 | + # Testing file: GSE54899_family_retagged-05242019_validated.xml | ||
| 89 | + testing_file = "GSE54899_family_retagged-05242019_validated.xml" | ||
| 90 | + | ||
| 91 | + # Define stanza pipeline for sentence segmentation | ||
| 92 | + nlp_sentence_segmentation = stanza.Pipeline(lang='en', processors='tokenize') | ||
| 93 | + # Define stanza pipeline for lemmatization and pos tagging without sentence segmentation | ||
| 94 | + nlp = stanza.Pipeline(lang='en', processors='tokenize,mwt,pos,lemma', tokenize_no_ssplit=True) | ||
| 95 | + | ||
| 96 | + # Store field_name (bangline) and field_text | ||
| 97 | + field_name = "" | ||
| 98 | + field_text = "" | ||
| 99 | + | ||
| 100 | + # Store list of unique field_name | ||
| 101 | + hash_field_name = {} | ||
| 102 | + | ||
| 103 | + # Store sentences from fields that contained at least one GC tag. | ||
| 104 | + # We want to use this list for someone to check it | ||
| 105 | + df_sentences_to_check = pd.DataFrame(columns=['serie', 'serie_pubmed_id', 'sample', 'field_name', 'original_sentence', 'modified_sentence', 'transformed_sentence']) | ||
| 106 | + | ||
| 107 | + # Store serie number | ||
| 108 | + # ^SERIES = GSE54899 | ||
| 109 | + serie = "" | ||
| 110 | + # Store series pubmed id | ||
| 111 | + # !Series_pubmed_id = 25222563 | ||
| 112 | + serie_pubmed_id = "" | ||
| 113 | + # Store sample | ||
| 114 | + # ^SAMPLE = GSM1326335 | ||
| 115 | + sample = "" | ||
| 116 | + | ||
| 117 | + for path, dirs, files in os.walk(args.inputPath): | ||
| 118 | + # For each file in dir | ||
| 119 | + for file in files: | ||
| 120 | + if file == testing_file: | ||
| 121 | + print(" Reading file..." + str(file)) | ||
| 122 | + with open(os.path.join(args.inputPath, file)) as iFile: | ||
| 123 | + for line in iFile: | ||
| 124 | + line = line.rstrip('\n') | ||
| 125 | + if line.find(" = ") == -1: | ||
| 126 | + continue | ||
| 127 | + list_line = line.split(" = ") | ||
| 128 | + field_name = list_line[0] | ||
| 129 | + #print("field_name: {}".format(field_name)) | ||
| 130 | + field_text = list_line[1] | ||
| 131 | + #print("field_text: {}".format(field_text)) | ||
| 132 | + if field_name == "^SERIES": | ||
| 133 | + serie = field_text | ||
| 134 | + elif field_name == "!Series_pubmed_id": | ||
| 135 | + serie_pubmed_id = field_text | ||
| 136 | + elif field_name == "^SAMPLE": | ||
| 137 | + sample = field_text | ||
| 138 | + elif regex_has_tag.search(line): # Contains GC tag | ||
| 139 | + if field_name in hash_field_name: | ||
| 140 | + hash_field_name[field_name] += 1 | ||
| 141 | + else: | ||
| 142 | + hash_field_name[field_name] = 1 | ||
| 143 | + original_sentence = field_text | ||
| 144 | + # delete GC tags | ||
| 145 | + modified_sentence = regex_delete_tag.sub("", field_text) | ||
| 146 | + modified_sentence = regex_delete_tag.sub("", modified_sentence) | ||
| 147 | + # substitute tags | ||
| 148 | + # p = re.compile(r'blue (?P<animal>dog|cat)') | ||
| 149 | + # p.sub(r'gray \g<animal>', s) | ||
| 150 | + modified_sentence = regex_subs_ini_tag.sub(r' INI_\g<tag> ', modified_sentence) | ||
| 151 | + modified_sentence = regex_subs_end_tag.sub(r' END_\g<tag> ', modified_sentence) | ||
| 152 | + doc = nlp(modified_sentence) | ||
| 153 | + for i, sentence in enumerate(doc.sentences): | ||
| 154 | + # print(sentence.text) | ||
| 155 | + list_transformed_sentence = [] | ||
| 156 | + # For GC tag | ||
| 157 | + gc_tag = "O" | ||
| 158 | + in_tag = False | ||
| 159 | + for word in sentence.words: | ||
| 160 | + result = regex_gc_ini_tag.match(word.text) | ||
| 161 | + if result: | ||
| 162 | + gc_tag = result.group("tag") | ||
| 163 | + in_tag = True | ||
| 164 | + continue | ||
| 165 | + else: | ||
| 166 | + result = regex_gc_end_tag.match(word.text) | ||
| 167 | + if result: | ||
| 168 | + gc_tag = "O" | ||
| 169 | + in_tag = False | ||
| 170 | + continue | ||
| 171 | + else: | ||
| 172 | + if not in_tag: | ||
| 173 | + gc_tag = "O" | ||
| 174 | + list_transformed_sentence.append("{}|{}|{}|{}".format(word.text, word.lemma, word.xpos, gc_tag)) | ||
| 175 | + transformed_sentence = " ".join(list_transformed_sentence) | ||
| 176 | + new_row = {'serie': serie, | ||
| 177 | + 'serie_pubmed_id': serie_pubmed_id, | ||
| 178 | + 'sample': sample, | ||
| 179 | + 'field_name': field_name, | ||
| 180 | + 'original_sentence': original_sentence, | ||
| 181 | + 'modified_sentence': sentence.text, | ||
| 182 | + 'transformed_sentence': transformed_sentence} | ||
| 183 | + df_sentences_to_check = df_sentences_to_check.append(new_row, ignore_index=True) | ||
| 184 | + df_sentences_to_check.to_csv(os.path.join(args.outputPath, 'geo_sentences_to_check.csv')) | ||
| 185 | + #print(token) | ||
| 186 | + quit() | ||
| 187 | + | ||
| 188 | + ## End of tagging | ||
| 189 | + out_labels = { | ||
| 190 | + '</Gtype>': 'O', | ||
| 191 | + '</Gversion>': 'O', | ||
| 192 | + '</Med>': 'O', | ||
| 193 | + '</Phase>': 'O', | ||
| 194 | + '</Substrain>': 'O', | ||
| 195 | + '</Supp>': 'O', | ||
| 196 | + '</Strain>': 'O', | ||
| 197 | + '</Technique>': 'O', | ||
| 198 | + '</Temp>': 'O', | ||
| 199 | + '</OD>': 'O', | ||
| 200 | + '</Anti>': 'O', | ||
| 201 | + '</Agit>': 'O', | ||
| 202 | + '</Air>': 'O', | ||
| 203 | + '</Vess>': 'O', | ||
| 204 | + '</pH>': 'O'} | ||
| 205 | + old_labels = { | ||
| 206 | + '<Orgn>': 'O', | ||
| 207 | + '</Orgn>': 'O' | ||
| 208 | + } | ||
| 209 | + | ||
| 210 | + # Other label | ||
| 211 | + flag = 'O' | ||
| 212 | + lista = [] | ||
| 213 | + # First sentence | ||
| 214 | + sentence = '' | ||
| 215 | + n = 0 | ||
| 216 | + with open(os.path.join(args.inputPath, args.inputFile), "r") as input_file: | ||
| 217 | + for line in input_file: | ||
| 218 | + if len(line.split('\t')) > 1: | ||
| 219 | + w = line.split('\t')[1] | ||
| 220 | + if w in in_labels or w in out_labels: | ||
| 221 | + # Tagging | ||
| 222 | + if w in in_labels.keys(): flag = in_labels[w] | ||
| 223 | + if w in out_labels: flag = out_labels[w] | ||
| 224 | + else: | ||
| 225 | + if w == "PGCGROWTHCONDITIONS": | ||
| 226 | + n = n + 1 | ||
| 227 | + words = sentence.split(' ') | ||
| 228 | + # End of sentence | ||
| 229 | + tags = [tag for tag in words if tag.split('|')[-1] in in_labels.values()] | ||
| 230 | + # At least one true-tag on sentence | ||
| 231 | + if len(tags) > 0: | ||
| 232 | + lista.append(sentence) | ||
| 233 | + # New setence | ||
| 234 | + sentence = '' | ||
| 235 | + elif w not in old_labels.keys(): | ||
| 236 | + # Building and save tagging sentence | ||
| 237 | + sentence = sentence + ' ' + ('|'.join(line.split('\t')[1:args.index]) + '|' + flag + ' ') | ||
| 238 | + | ||
| 239 | + print("Number of sentences with at least one tag: " + str(len(lista))) | ||
| 240 | + print("Number of sentences from CoreNLP: " + str(n)) | ||
| 241 | + | ||
| 242 | + # Split 70 30 training and test sentences | ||
| 243 | + trainingIndex = random.sample(range(len(lista)), int(len(lista) * .70)) | ||
| 244 | + testIndex = [n for n in range(len(lista)) if n not in trainingIndex] | ||
| 245 | + print("Number of sentences for training: " + str(len(trainingIndex))) | ||
| 246 | + print("Number of sentences for test: " + str(len(testIndex))) | ||
| 247 | + | ||
| 248 | + with open(os.path.join(args.outputPath, args.trainingFile), "w") as oFile: | ||
| 249 | + Data = [lista[i] for i in trainingIndex] | ||
| 250 | + oFile.write('\n'.join(Data)) | ||
| 251 | + | ||
| 252 | + with open(os.path.join(args.outputPath, args.testFile), "w") as oFile: | ||
| 253 | + Data = [lista[i] for i in testIndex] | ||
| 254 | + oFile.write('\n'.join(Data)) | ||
| 255 | + | ||
| 256 | + print("==================================END===================================") |
| 1 | +import stanza | ||
| 2 | +import argparse | ||
| 3 | +import re | ||
| 4 | +import os | ||
| 5 | +import pandas as pd | ||
| 6 | + | ||
| 7 | +# Objective | ||
| 8 | +# Sentences extraction from XML Soft files. | ||
| 9 | +# | ||
| 10 | +# Input parameters | ||
| 11 | +# --inputPath=PATH Path to XML Soft files | ||
| 12 | +# --outputPath=PATH Path to place output files | ||
| 13 | +# | ||
| 14 | +# Output | ||
| 15 | +# Files with sentences obtained from XML Soft files | ||
| 16 | +# | ||
| 17 | +# Examples | ||
| 18 | +# python extract-sentences-from-softfiles.py | ||
| 19 | +# --inputPath /home/cmendezc/Documents/ccg/gitlab-automatic-extraction-growth-conditions/data-sets/data-curation/tagged-xml-data | ||
| 20 | +# --outputPath /home/cmendezc/Documents/ccg/gitlab-automatic-extraction-growth-conditions/data-sets/data-curation/curated-sentences | ||
| 21 | +# | ||
| 22 | +# python extract-sentences-from-softfiles.py --inputPath /home/cmendezc/Documents/ccg/gitlab-automatic-extraction-growth-conditions/data-sets/data-curation/tagged-xml-data --outputPath /home/cmendezc/Documents/ccg/gitlab-automatic-extraction-growth-conditions/data-sets/data-curation/curated-sentences | ||
| 23 | + | ||
| 24 | +########################################## | ||
| 25 | +# MAIN PROGRAM # | ||
| 26 | +########################################## | ||
| 27 | + | ||
| 28 | +if __name__ == "__main__": | ||
| 29 | + # Defining parameters | ||
| 30 | + parser = argparse.ArgumentParser( | ||
| 31 | + prog='extract-sentences-from-softfiles', | ||
| 32 | + description='Sentences extraction from XML Soft files.', | ||
| 33 | + epilog='') | ||
| 34 | + parser.add_argument("--inputPath", dest="inputPath", | ||
| 35 | + help="Path to XML Soft files", metavar="PATH") | ||
| 36 | + parser.add_argument("--outputPath", dest="outputPath", | ||
| 37 | + help="Path for output files", metavar="PATH") | ||
| 38 | + | ||
| 39 | + args = parser.parse_args() | ||
| 40 | + | ||
| 41 | + print('-------------------------------- PARAMETERS --------------------------------') | ||
| 42 | + print("Path to XML Soft files: " + args.inputPath) | ||
| 43 | + print("Path to output files: " + args.outputPath) | ||
| 44 | + print('-------------------------------- PROCESSING --------------------------------') | ||
| 45 | + | ||
| 46 | + ## Tags of GCs into consideration | ||
| 47 | + # culture medium, medium supplements, aeration, temperature, | ||
| 48 | + # pH, agitation, growth phase, optical density, genetic background | ||
| 49 | + tags = { | ||
| 50 | + '<Gtype>': 'Gtype', | ||
| 51 | + # '<Gversion>': 'Gversion', | ||
| 52 | + '<Med>': 'Med', | ||
| 53 | + '<Phase>': 'Phase', | ||
| 54 | + # '<Substrain>': 'Substrain', | ||
| 55 | + '<Supp>': 'Supp', | ||
| 56 | + # '<Strain>': 'Strain', | ||
| 57 | + # '<Technique>': 'Technique', | ||
| 58 | + '<Temp>': 'Temp', | ||
| 59 | + '<OD>': 'OD', | ||
| 60 | + '<Anti>': 'Anti', | ||
| 61 | + '<Agit>': 'Agit', | ||
| 62 | + '<Air>': 'Air', | ||
| 63 | + '<Vess>': 'Vess', | ||
| 64 | + '<pH>': 'pH' | ||
| 65 | + } | ||
| 66 | + #tags = ['<Gtype>', '<Med>', '<Phase>', '<Supp>', | ||
| 67 | + # '<Temp>', '<OD>', '<Anti>', '<Agit>', | ||
| 68 | + # '<Air>', '<Vess>', '<pH>'] | ||
| 69 | + #deleted_tags = ['<Gversion>', '<Substrain>', '<Strain>', '<Technique>'] | ||
| 70 | + tags = ['Gtype', 'Med', 'Phase', 'Supp', | ||
| 71 | + 'Temp', 'OD', 'Anti', 'Agit', | ||
| 72 | + 'Air', 'Vess', 'pH'] | ||
| 73 | + deleted_tags = ['Gversion', 'Substrain', 'Strain', 'Technique'] | ||
| 74 | + all_tags = tags + deleted_tags | ||
| 75 | + # Regex to check if line has a tag | ||
| 76 | + regex_has_tag = re.compile(r'<(' + '|'.join(all_tags) + r')>') | ||
| 77 | + # Regex to delete tags | ||
| 78 | + regex_delete_tag = re.compile(r'</?(' + '|'.join(deleted_tags) + r')>') | ||
| 79 | + # Regex to substitute tags | ||
| 80 | + regex_subs_ini_tag = re.compile(r'<(?P<tag>(' + '|'.join(tags) + r'))>') | ||
| 81 | + regex_subs_end_tag = re.compile(r'</(?P<tag>(' + '|'.join(tags) + r'))>') | ||
| 82 | + #p = re.compile(r'blue (?P<animal>dog|cat)') | ||
| 83 | + #p.sub(r'gray \g<animal>', s) | ||
| 84 | + # Regex to tag GCs | ||
| 85 | + regex_gc_ini_tag = re.compile(r'INI_(?P<tag>(' + '|'.join(tags) + r'))') | ||
| 86 | + regex_gc_end_tag = re.compile(r'END_(?P<tag>(' + '|'.join(tags) + r'))') | ||
| 87 | + | ||
| 88 | + # Testing file: GSE54899_family_retagged-05242019_validated.xml | ||
| 89 | + testing_file = "GSE54899_family_retagged-05242019_validated.xml" | ||
| 90 | + | ||
| 91 | + # Define stanza pipeline for sentence segmentation | ||
| 92 | + nlp_sentence_segmentation = stanza.Pipeline(lang='en', processors='tokenize') | ||
| 93 | + # Define stanza pipeline for lemmatization and pos tagging without sentence segmentation | ||
| 94 | + nlp = stanza.Pipeline(lang='en', processors='tokenize,mwt,pos,lemma', tokenize_no_ssplit=True) | ||
| 95 | + | ||
| 96 | + # Store field_name (bangline) and field_text | ||
| 97 | + field_name = "" | ||
| 98 | + field_text = "" | ||
| 99 | + | ||
| 100 | + # Store list of unique field_name | ||
| 101 | + hash_field_name = {} | ||
| 102 | + | ||
| 103 | + # Store sentences from fields that contained at least one GC tag. | ||
| 104 | + # We want to use this list for someone to check it | ||
| 105 | + df_sentences_to_check = pd.DataFrame(columns=['serie', 'serie_pubmed_id', 'sample', 'field_name', 'original_sentence', 'modified_sentence', 'transformed_sentence']) | ||
| 106 | + | ||
| 107 | + # Store serie number | ||
| 108 | + # ^SERIES = GSE54899 | ||
| 109 | + serie = "" | ||
| 110 | + # Store series pubmed id | ||
| 111 | + # !Series_pubmed_id = 25222563 | ||
| 112 | + serie_pubmed_id = "" | ||
| 113 | + # Store sample | ||
| 114 | + # ^SAMPLE = GSM1326335 | ||
| 115 | + sample = "" | ||
| 116 | + | ||
| 117 | + for path, dirs, files in os.walk(args.inputPath): | ||
| 118 | + # For each file in dir | ||
| 119 | + for file in files: | ||
| 120 | + if file == testing_file: | ||
| 121 | + print(" Reading file..." + str(file)) | ||
| 122 | + with open(os.path.join(args.inputPath, file)) as iFile: | ||
| 123 | + for line in iFile: | ||
| 124 | + line = line.rstrip('\n') | ||
| 125 | + if line.find(" = ") == -1: | ||
| 126 | + continue | ||
| 127 | + list_line = line.split(" = ") | ||
| 128 | + field_name = list_line[0] | ||
| 129 | + #print("field_name: {}".format(field_name)) | ||
| 130 | + field_text = list_line[1] | ||
| 131 | + #print("field_text: {}".format(field_text)) | ||
| 132 | + if field_name == "^SERIES": | ||
| 133 | + serie = field_text | ||
| 134 | + elif field_name == "!Series_pubmed_id": | ||
| 135 | + serie_pubmed_id = field_text | ||
| 136 | + elif field_name == "^SAMPLE": | ||
| 137 | + sample = field_text | ||
| 138 | + elif regex_has_tag.search(line): # Contains GC tag | ||
| 139 | + if field_name in hash_field_name: | ||
| 140 | + hash_field_name[field_name] += 1 | ||
| 141 | + else: | ||
| 142 | + hash_field_name[field_name] = 1 | ||
| 143 | + original_sentence = field_text | ||
| 144 | + # delete GC tags | ||
| 145 | + modified_sentence = regex_delete_tag.sub("", field_text) | ||
| 146 | + modified_sentence = regex_delete_tag.sub("", modified_sentence) | ||
| 147 | + # substitute tags | ||
| 148 | + # p = re.compile(r'blue (?P<animal>dog|cat)') | ||
| 149 | + # p.sub(r'gray \g<animal>', s) | ||
| 150 | + modified_sentence = regex_subs_ini_tag.sub(r' INI_\g<tag> ', modified_sentence) | ||
| 151 | + modified_sentence = regex_subs_end_tag.sub(r' END_\g<tag> ', modified_sentence) | ||
| 152 | + doc = nlp(modified_sentence) | ||
| 153 | + for i, sentence in enumerate(doc.sentences): | ||
| 154 | + # print(sentence.text) | ||
| 155 | + list_transformed_sentence = [] | ||
| 156 | + # For GC tag | ||
| 157 | + gc_tag = "O" | ||
| 158 | + in_tag = False | ||
| 159 | + for word in sentence.words: | ||
| 160 | + result = regex_gc_ini_tag.match(word.text) | ||
| 161 | + if result: | ||
| 162 | + gc_tag = result.group("tag") | ||
| 163 | + in_tag = True | ||
| 164 | + continue | ||
| 165 | + else: | ||
| 166 | + result = regex_gc_end_tag.match(word.text) | ||
| 167 | + if result: | ||
| 168 | + gc_tag = "O" | ||
| 169 | + in_tag = False | ||
| 170 | + continue | ||
| 171 | + else: | ||
| 172 | + if not in_tag: | ||
| 173 | + gc_tag = "O" | ||
| 174 | + list_transformed_sentence.append("{}|{}|{}|{}".format(word.text, word.lemma, word.xpos, gc_tag)) | ||
| 175 | + transformed_sentence = " ".join(list_transformed_sentence) | ||
| 176 | + new_row = {'serie': serie, | ||
| 177 | + 'serie_pubmed_id': serie_pubmed_id, | ||
| 178 | + 'sample': sample, | ||
| 179 | + 'field_name': field_name, | ||
| 180 | + 'original_sentence': original_sentence, | ||
| 181 | + 'modified_sentence': sentence.text, | ||
| 182 | + 'transformed_sentence': transformed_sentence} | ||
| 183 | + df_sentences_to_check = df_sentences_to_check.append(new_row, ignore_index=True) | ||
| 184 | + df_sentences_to_check.to_csv(os.path.join(args.outputPath, 'geo_sentences_to_check.csv')) | ||
| 185 | + #print(token) | ||
| 186 | + quit() | ||
| 187 | + | ||
| 188 | + ## End of tagging | ||
| 189 | + out_labels = { | ||
| 190 | + '</Gtype>': 'O', | ||
| 191 | + '</Gversion>': 'O', | ||
| 192 | + '</Med>': 'O', | ||
| 193 | + '</Phase>': 'O', | ||
| 194 | + '</Substrain>': 'O', | ||
| 195 | + '</Supp>': 'O', | ||
| 196 | + '</Strain>': 'O', | ||
| 197 | + '</Technique>': 'O', | ||
| 198 | + '</Temp>': 'O', | ||
| 199 | + '</OD>': 'O', | ||
| 200 | + '</Anti>': 'O', | ||
| 201 | + '</Agit>': 'O', | ||
| 202 | + '</Air>': 'O', | ||
| 203 | + '</Vess>': 'O', | ||
| 204 | + '</pH>': 'O'} | ||
| 205 | + old_labels = { | ||
| 206 | + '<Orgn>': 'O', | ||
| 207 | + '</Orgn>': 'O' | ||
| 208 | + } | ||
| 209 | + | ||
| 210 | + # Other label | ||
| 211 | + flag = 'O' | ||
| 212 | + lista = [] | ||
| 213 | + # First sentence | ||
| 214 | + sentence = '' | ||
| 215 | + n = 0 | ||
| 216 | + with open(os.path.join(args.inputPath, args.inputFile), "r") as input_file: | ||
| 217 | + for line in input_file: | ||
| 218 | + if len(line.split('\t')) > 1: | ||
| 219 | + w = line.split('\t')[1] | ||
| 220 | + if w in in_labels or w in out_labels: | ||
| 221 | + # Tagging | ||
| 222 | + if w in in_labels.keys(): flag = in_labels[w] | ||
| 223 | + if w in out_labels: flag = out_labels[w] | ||
| 224 | + else: | ||
| 225 | + if w == "PGCGROWTHCONDITIONS": | ||
| 226 | + n = n + 1 | ||
| 227 | + words = sentence.split(' ') | ||
| 228 | + # End of sentence | ||
| 229 | + tags = [tag for tag in words if tag.split('|')[-1] in in_labels.values()] | ||
| 230 | + # At least one true-tag on sentence | ||
| 231 | + if len(tags) > 0: | ||
| 232 | + lista.append(sentence) | ||
| 233 | + # New setence | ||
| 234 | + sentence = '' | ||
| 235 | + elif w not in old_labels.keys(): | ||
| 236 | + # Building and save tagging sentence | ||
| 237 | + sentence = sentence + ' ' + ('|'.join(line.split('\t')[1:args.index]) + '|' + flag + ' ') | ||
| 238 | + | ||
| 239 | + print("Number of sentences with at least one tag: " + str(len(lista))) | ||
| 240 | + print("Number of sentences from CoreNLP: " + str(n)) | ||
| 241 | + | ||
| 242 | + # Split 70 30 training and test sentences | ||
| 243 | + trainingIndex = random.sample(range(len(lista)), int(len(lista) * .70)) | ||
| 244 | + testIndex = [n for n in range(len(lista)) if n not in trainingIndex] | ||
| 245 | + print("Number of sentences for training: " + str(len(trainingIndex))) | ||
| 246 | + print("Number of sentences for test: " + str(len(testIndex))) | ||
| 247 | + | ||
| 248 | + with open(os.path.join(args.outputPath, args.trainingFile), "w") as oFile: | ||
| 249 | + Data = [lista[i] for i in trainingIndex] | ||
| 250 | + oFile.write('\n'.join(Data)) | ||
| 251 | + | ||
| 252 | + with open(os.path.join(args.outputPath, args.testFile), "w") as oFile: | ||
| 253 | + Data = [lista[i] for i in testIndex] | ||
| 254 | + oFile.write('\n'.join(Data)) | ||
| 255 | + | ||
| 256 | + print("==================================END===================================") |
extraction-literature/input/README.md
0 → 100644
| 1 | +# Input article collection | ||
| 2 | +We used list of PMIDs from article collections delivered by curators (Víctor, Soco, Paloma). | ||
| 3 | +DRIVE (https://docs.google.com/spreadsheets/d/1OayfQ7ODgnU4d5PQ3SUAmFX3Tc27PocCHZ6flPXwLKc/edit?usp=sharing) | ||
| 4 | +Asana (https://app.asana.com/0/1200927210854847/1203428992254399/f) | ||
| 5 | + | ||
| 6 | +# Download PDFs | ||
| 7 | +We used [Pubmed-Batch-Download](https://github.com/billgreenwald/Pubmed-Batch-Download) tool to download PDF files. | ||
| 8 | +## Installation | ||
| 9 | +```shell | ||
| 10 | +(base) cmendezc@cmendezc-Latitude-7400:~/Documents/ccg$ git clone https://github.com/billgreenwald/Pubmed-Batch-Download.git | ||
| 11 | +Cloning into 'Pubmed-Batch-Download'... | ||
| 12 | +remote: Enumerating objects: 202, done. | ||
| 13 | +remote: Counting objects: 100% (12/12), done. | ||
| 14 | +remote: Compressing objects: 100% (12/12), done. | ||
| 15 | +remote: Total 202 (delta 5), reused 0 (delta 0), pack-reused 190 | ||
| 16 | +Receiving objects: 100% (202/202), 31.23 MiB | 1.09 MiB/s, done. | ||
| 17 | +Resolving deltas: 100% (102/102), done. | ||
| 18 | +(base) cmendezc@cmendezc-Latitude-7400:~/Documents/ccg$ mv Pubmed-Batch-Download/ github-Pubmed-Batch-Download | ||
| 19 | +(base) cmendezc@cmendezc-Latitude-7400:~/Documents/ccg$ cd github-Pubmed-Batch-Download/ | ||
| 20 | +(base) cmendezc@cmendezc-Latitude-7400:~/Documents/ccg/github-Pubmed-Batch-Download$ ls -l | ||
| 21 | +total 52 | ||
| 22 | +-rw-rw-r-- 1 cmendezc cmendezc 72 ene 5 11:31 example_pmf.tsv | ||
| 23 | +-rw-rw-r-- 1 cmendezc cmendezc 11430 ene 5 11:31 fetch_pdfs.py | ||
| 24 | +-rw-rw-r-- 1 cmendezc cmendezc 18711 ene 5 11:31 fetch_pdfs_toScript.ipynb | ||
| 25 | +-rw-rw-r-- 1 cmendezc cmendezc 551 ene 5 11:31 pubmed-batch-downloader-py3-windows.yml | ||
| 26 | +-rw-rw-r-- 1 cmendezc cmendezc 895 ene 5 11:31 pubmed-batch-downloader-py3.yml | ||
| 27 | +-rw-rw-r-- 1 cmendezc cmendezc 3667 ene 5 11:31 README.md | ||
| 28 | +drwxrwxr-x 2 cmendezc cmendezc 4096 ene 5 11:31 ruby_version | ||
| 29 | +-rw-rw-r-- 1 cmendezc cmendezc 0 ene 5 11:31 unfetched_pmids.tsv | ||
| 30 | +(base) cmendezc@cmendezc-Latitude-7400:~/Documents/ccg/github-Pubmed-Batch-Download$ conda env create -f pubmed-batch-downloader-py3.yml | ||
| 31 | +``` | ||
| 32 | +## Testing | ||
| 33 | +Error! | ||
| 34 | +```shell | ||
| 35 | +(base) cmendezc@cmendezc-Latitude-7400:~/Documents/ccg/github-Pubmed-Batch-Download$ conda activate pubmed-batch-downloader-py3 | ||
| 36 | +(pubmed-batch-downloader-py3) cmendezc@cmendezc-Latitude-7400:~/Documents/ccg/github-Pubmed-Batch-Download$ python fetch_pdfs.py -pmf example_pmf.tsv | ||
| 37 | +Traceback (most recent call last): | ||
| 38 | + File "fetch_pdfs.py", line 64, in <module> | ||
| 39 | + from bs4 import BeautifulSoup | ||
| 40 | +ModuleNotFoundError: No module named 'bs4' | ||
| 41 | +``` | ||
| 42 | +Fix 1: Install bs4 | ||
| 43 | +```shell | ||
| 44 | +(pubmed-batch-downloader-py3) cmendezc@cmendezc-Latitude-7400:~/Documents/ccg/github-Pubmed-Batch-Download$ pip install bs4 | ||
| 45 | +Collecting bs4 | ||
| 46 | + Using cached https://files.pythonhosted.org/packages/10/ed/7e8b97591f6f456174139ec089c769f89a94a1a4025fe967691de971f314/bs4-0.0.1.tar.gz | ||
| 47 | +Collecting beautifulsoup4 (from bs4) | ||
| 48 | + Using cached https://files.pythonhosted.org/packages/9c/d8/909c4089dbe4ade9f9705f143c9f13f065049a9d5e7d34c828aefdd0a97c/beautifulsoup4-4.11.1-py3-none-any.whl | ||
| 49 | +Collecting soupsieve>1.2 (from beautifulsoup4->bs4) | ||
| 50 | + Using cached https://files.pythonhosted.org/packages/16/e3/4ad79882b92617e3a4a0df1960d6bce08edfb637737ac5c3f3ba29022e25/soupsieve-2.3.2.post1-py3-none-any.whl | ||
| 51 | +Building wheels for collected packages: bs4 | ||
| 52 | + Building wheel for bs4 (setup.py) ... done | ||
| 53 | + Stored in directory: /home/cmendezc/.cache/pip/wheels/a0/b0/b2/4f80b9456b87abedbc0bf2d52235414c3467d8889be38dd472 | ||
| 54 | +Successfully built bs4 | ||
| 55 | +Installing collected packages: soupsieve, beautifulsoup4, bs4 | ||
| 56 | +Successfully installed beautifulsoup4-4.11.1 bs4-0.0.1 soupsieve-2.3.2.post1 | ||
| 57 | +``` | ||
| 58 | +Error! | ||
| 59 | +```shell | ||
| 60 | +(pubmed-batch-downloader-py3) cmendezc@cmendezc-Latitude-7400:~/Documents/ccg/github-Pubmed-Batch-Download$ python fetch_pdfs.py -pmf example_pmf.tsv | ||
| 61 | +Output directory of fetched_pdfs did not exist. Created the directory. | ||
| 62 | +Trying to fetch pmid 27547345 | ||
| 63 | +** fetching of reprint 27547345 failed from error Couldn't find a tree builder with the features you requested: lxml. Do you need to install a parser library? | ||
| 64 | +Trying to fetch pmid 22610656 | ||
| 65 | +** fetching of reprint 22610656 failed from error Couldn't find a tree builder with the features you requested: lxml. Do you need to install a parser library? | ||
| 66 | +Trying to fetch pmid 23858657 | ||
| 67 | +** fetching of reprint 23858657 failed from error Couldn't find a tree builder with the features you requested: lxml. Do you need to install a parser library? | ||
| 68 | +Trying to fetch pmid 24998529 | ||
| 69 | +** fetching of reprint 24998529 failed from error Couldn't find a tree builder with the features you requested: lxml. Do you need to install a parser library? | ||
| 70 | +Trying to fetch pmid 27859194 | ||
| 71 | +** fetching of reprint 27859194 failed from error Couldn't find a tree builder with the features you requested: lxml. Do you need to install a parser library? | ||
| 72 | +Trying to fetch pmid 26991916 | ||
| 73 | +** fetching of reprint 26991916 failed from error Couldn't find a tree builder with the features you requested: lxml. Do you need to install a parser library? | ||
| 74 | +Trying to fetch pmid 26742956 | ||
| 75 | +** fetching of reprint 26742956 failed from error Couldn't find a tree builder with the features you requested: lxml. Do you need to install a parser library? | ||
| 76 | +Trying to fetch pmid 28388874 | ||
| 77 | +** fetching of reprint 28388874 failed from error Couldn't find a tree builder with the features you requested: lxml. Do you need to install a parser library? | ||
| 78 | +``` | ||
| 79 | +Fix 2: Install | ||
| 80 | +```shell | ||
| 81 | +(pubmed-batch-downloader-py3) cmendezc@cmendezc-Latitude-7400:~/Documents/ccg/github-Pubmed-Batch-Download$ pip install lxml | ||
| 82 | +Collecting lxml | ||
| 83 | + Downloading https://files.pythonhosted.org/packages/4b/24/300d0fd5130cf55e5bbab2c53d339728370cb4ac12ca80a4f421c2e228eb/lxml-4.9.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (5.8MB) | ||
| 84 | + |████████████████████████████████| 5.8MB 2.7MB/s | ||
| 85 | +Installing collected packages: lxml | ||
| 86 | +Successfully installed lxml-4.9.2 | ||
| 87 | +``` | ||
| 88 | +It runs, but it didn't fetch all files. See unfetch_pmids.tsv | ||
| 89 | + | ||
| 90 | +## Run | ||
| 91 | +```shell | ||
| 92 | +(base) cmendezc@cmendezc-Latitude-7400:~/Documents/ccg/github-Pubmed-Batch-Download$ conda activate pubmed-batch-downloader-py3 | ||
| 93 | +(pubmed-batch-downloader-py3) cmendezc@cmendezc-Latitude-7400:~/Documents/ccg/github-Pubmed-Batch-Download$ cd /home/cmendezc/Documents/ccg/gitlab-automatic-extraction-growth-conditions/extraction-literature/input/pdfs | ||
| 94 | +(pubmed-batch-downloader-py3) cmendezc@cmendezc-Latitude-7400:~/Documents/ccg/gitlab-automatic-extraction-growth-conditions/extraction-literature/input/pdfs$ python /home/cmendezc/Documents/ccg/github-Pubmed-Batch-Download/fetch_pdfs.py -pmf ../list_of_PMIDs.txt | ||
| 95 | +Output directory of fetched_pdfs did not exist. Created the directory. | ||
| 96 | +Trying to fetch pmid 21097887 | ||
| 97 | +Trying genericCitationLabelled | ||
| 98 | +Trying pubmed_central_v2 | ||
| 99 | +** fetching reprint using the 'pubmed central' finder... | ||
| 100 | +** fetching of reprint 21097887 succeeded | ||
| 101 | +Trying to fetch pmid 23818864 | ||
| 102 | +Trying genericCitationLabelled | ||
| 103 | +Trying pubmed_central_v2 | ||
| 104 | +Trying acsPublications | ||
| 105 | +Trying uchicagoPress | ||
| 106 | +Trying nejm | ||
| 107 | +Trying futureMedicine | ||
| 108 | +Trying science_direct | ||
| 109 | +** fetching of reprint 23818864 failed from error list index out of range | ||
| 110 | +Trying to fetch pmid 24947454 | ||
| 111 | +Trying genericCitationLabelled | ||
| 112 | +** fetching reprint using the 'generic citation labelled' finder... | ||
| 113 | +** fetching of reprint 24947454 succeeded | ||
| 114 | +Trying to fetch pmid 25222563 | ||
| 115 | +Trying genericCitationLabelled | ||
| 116 | +** fetching reprint using the 'generic citation labelled' finder... | ||
| 117 | +** fetching of reprint 25222563 succeeded | ||
| 118 | +Trying to fetch pmid 25275371 | ||
| 119 | +Trying genericCitationLabelled | ||
| 120 | +** fetching reprint using the 'generic citation labelled' finder... | ||
| 121 | +** fetching of reprint 25275371 succeeded | ||
| 122 | +Trying to fetch pmid 25735747 | ||
| 123 | +Trying genericCitationLabelled | ||
| 124 | +Trying pubmed_central_v2 | ||
| 125 | +** fetching reprint using the 'pubmed central' finder... | ||
| 126 | +** fetching of reprint 25735747 succeeded | ||
| 127 | +Trying to fetch pmid 26258987 | ||
| 128 | +Trying genericCitationLabelled | ||
| 129 | +** fetching reprint using the 'generic citation labelled' finder... | ||
| 130 | +** fetching of reprint 26258987 succeeded | ||
| 131 | +Trying to fetch pmid 26279566 | ||
| 132 | +Trying genericCitationLabelled | ||
| 133 | +Trying pubmed_central_v2 | ||
| 134 | +Trying acsPublications | ||
| 135 | +Trying uchicagoPress | ||
| 136 | +Trying nejm | ||
| 137 | +Trying futureMedicine | ||
| 138 | +Trying science_direct | ||
| 139 | +Trying direct_pdf_link | ||
| 140 | +** Reprint 26279566 could not be fetched with the current finders. | ||
| 141 | +Trying to fetch pmid 26670385 | ||
| 142 | +Trying genericCitationLabelled | ||
| 143 | +Trying pubmed_central_v2 | ||
| 144 | +Trying acsPublications | ||
| 145 | +Trying uchicagoPress | ||
| 146 | +Trying nejm | ||
| 147 | +Trying futureMedicine | ||
| 148 | +Trying science_direct | ||
| 149 | +** fetching of reprint 26670385 failed from error Invalid URL 'f680gMuZlnwT4304lwsG531xpi4vbk83nDntGZ4l27M-1672941516-0-AdC4bMbcOc9cCSwDp4lsqirHOW3zv1msNUw8lijeZIxdN3BDTBUy983qf_LAfBVEhkt4k2Xwu_NYMJeaq3oG4LTyfDBxN2Ra-cmmTDVIK66GtTB9oyyn4GqMem1PTBwVEFtzEYcB4AOoR8EGbwWrEZa1jPMupBq_gJ0JlxuIGbBJw3SuuioKmRlQT_TDXNREjT2Av3DHrrz6C008shr-pgrTtAoM5aZ0N4clcoBQ1FWX04MZm-nPOxI-2zbxcHUYXqV91lbH7iWkztZWPcv6-Q3ePiFD6_-C7pdY_Mf0Y670kOKyhoqlZ0m3PqPm64-37r-nzxrcd2Z0MWJUMC8Jx1b1OA1e53TJy62F2K5ws3U82zktr4gEDS11A13r8DIn1wRCEH2dk8jI02NQoIp3JBTvUixhiNkWib01Zl7l7iAFLOJtWlVbeUsOwCh6imfV5m-2No7-SiGaur5Ip6Zf3ACDki_CjXifHxtGVh1TbvnYsBeUdoaWV3TsXdGvF7AVr_ytXg4-JiIHhaZ-SdCzpe65bWZmvwrIpCfZOEBOC-gNTm3tq5h1_2iQzVTinGQonsXdwLCYSKQeZRQ-qEFf7y4PpesHamAXmw1OZlZJtKlFgXx9MoCBp0Irx8ChWyIo5RhSBoa9j1_JW8AX1x7KDY3UX32ItW7-a2Qw5IEL_FRS6cyXOg1FLeHlanntIl11kKWmXyJ86bsvEQBn2Q9-1kMvisDZaM0LrfNT9KcghdkLgdpzsDEf-B4_MKdkVAhkBQ': No schema supplied. Perhaps you meant http://f680gMuZlnwT4304lwsG531xpi4vbk83nDntGZ4l27M-1672941516-0-AdC4bMbcOc9cCSwDp4lsqirHOW3zv1msNUw8lijeZIxdN3BDTBUy983qf_LAfBVEhkt4k2Xwu_NYMJeaq3oG4LTyfDBxN2Ra-cmmTDVIK66GtTB9oyyn4GqMem1PTBwVEFtzEYcB4AOoR8EGbwWrEZa1jPMupBq_gJ0JlxuIGbBJw3SuuioKmRlQT_TDXNREjT2Av3DHrrz6C008shr-pgrTtAoM5aZ0N4clcoBQ1FWX04MZm-nPOxI-2zbxcHUYXqV91lbH7iWkztZWPcv6-Q3ePiFD6_-C7pdY_Mf0Y670kOKyhoqlZ0m3PqPm64-37r-nzxrcd2Z0MWJUMC8Jx1b1OA1e53TJy62F2K5ws3U82zktr4gEDS11A13r8DIn1wRCEH2dk8jI02NQoIp3JBTvUixhiNkWib01Zl7l7iAFLOJtWlVbeUsOwCh6imfV5m-2No7-SiGaur5Ip6Zf3ACDki_CjXifHxtGVh1TbvnYsBeUdoaWV3TsXdGvF7AVr_ytXg4-JiIHhaZ-SdCzpe65bWZmvwrIpCfZOEBOC-gNTm3tq5h1_2iQzVTinGQonsXdwLCYSKQeZRQ-qEFf7y4PpesHamAXmw1OZlZJtKlFgXx9MoCBp0Irx8ChWyIo5RhSBoa9j1_JW8AX1x7KDY3UX32ItW7-a2Qw5IEL_FRS6cyXOg1FLeHlanntIl11kKWmXyJ86bsvEQBn2Q9-1kMvisDZaM0LrfNT9KcghdkLgdpzsDEf-B4_MKdkVAhkBQ? | ||
| 150 | +Trying to fetch pmid 26673755 | ||
| 151 | +Trying genericCitationLabelled | ||
| 152 | +** fetching reprint using the 'generic citation labelled' finder... | ||
| 153 | +** fetching of reprint 26673755 succeeded | ||
| 154 | +Trying to fetch pmid 28061857 | ||
| 155 | +Trying genericCitationLabelled | ||
| 156 | +** fetching reprint using the 'generic citation labelled' finder... | ||
| 157 | +** fetching of reprint 28061857 succeeded | ||
| 158 | +Trying to fetch pmid 28526842 | ||
| 159 | +Trying genericCitationLabelled | ||
| 160 | +** fetching reprint using the 'generic citation labelled' finder... | ||
| 161 | +** fetching of reprint 28526842 succeeded | ||
| 162 | +Trying to fetch pmid 29394395 | ||
| 163 | +Trying genericCitationLabelled | ||
| 164 | +Trying pubmed_central_v2 | ||
| 165 | +** fetching reprint using the 'pubmed central' finder... | ||
| 166 | +** fetching of reprint 29394395 succeeded | ||
| 167 | +Trying to fetch pmid 30137486 | ||
| 168 | +Trying genericCitationLabelled | ||
| 169 | +Trying pubmed_central_v2 | ||
| 170 | +** fetching reprint using the 'pubmed central' finder... | ||
| 171 | +** fetching of reprint 30137486 succeeded | ||
| 172 | +Trying to fetch pmid 30389436 | ||
| 173 | +Trying genericCitationLabelled | ||
| 174 | +Trying pubmed_central_v2 | ||
| 175 | +Trying acsPublications | ||
| 176 | +Trying uchicagoPress | ||
| 177 | +Trying nejm | ||
| 178 | +Trying futureMedicine | ||
| 179 | +Trying science_direct | ||
| 180 | +Trying direct_pdf_link | ||
| 181 | +** Reprint 30389436 could not be fetched with the current finders. | ||
| 182 | +Trying to fetch pmid 30420454 | ||
| 183 | +Trying genericCitationLabelled | ||
| 184 | +Trying pubmed_central_v2 | ||
| 185 | +Trying acsPublications | ||
| 186 | +Trying uchicagoPress | ||
| 187 | +Trying nejm | ||
| 188 | +Trying futureMedicine | ||
| 189 | +Trying science_direct | ||
| 190 | +** fetching of reprint 30420454 failed from error Invalid URL 'Ldk64CcSIXJkCs8pPt2YjpLkZJhaKs_m0pmekzL5SOY-1672941544-0-AaBQfP_66yID5nRb-xUznbW2FVXAWeiOcKnTosB_FUSxeeNSucukyHbo7OxAcJttrfA4pKDGasC8MvQz6o0cFBZv0VU_RfYXUn7Z6iVg5eVp7n_O4P9Zzk0IiE132EMNR-Xn0_gEfYM8DMCX5lS4yEgrs9hwhdJIWzS6N3fsDsa3kDjIH9oELTaDEbTbFvUXEkx3212-4NJ6SwCvfUzhtolsD7xJoswFQHjNBFrmUgScEORQpIWTWxzHPvpGTxepQMUPuAEbgNNykNbdp9oyLMDwmUnIqU7hSmeCkYU1RWlbxh95rcgH-yvV9mm3RQnIXT3WfcUE9lM5crnbBcplVCA4jbLP7kk1tu_BFbh-6gstCr0B24gEE5zJ41WGxwTbABhAmK7aAeHbH7V55EBpLOQcpkYhWZNiMMbVsG314TM_tE9UGM8B99FmrWUqCqwMcsGwDDWK7B-uHcDD5nJxQhgV5SlMnS0IVE18Bdu4zqIzT3ZS2sgGf9Drti4P5Qkso3v1pW_fBzq-Mrd6_O7cvwF7FlRc95tOXSjjS0Woc70HGNBNd1kc0ZR9NuwV9TnvPRWbuoYu3HXz65DeWmbGaLOFdHAOUARr1fD9DL9LRDmeAHOGkYkplz9pSbWXR6vYkIInqFnvQKuwhnVOltaWa6_VG3BH0oc9T4xAZdH83DsG6eHtJlitVhH8Sx_PBfukG4x0S1qsmIWUPDwZhwUe55ly0I5ISELLL8Z3tAJpq3zrdyV6CbwOjF7-nPF7aRNuxg': No schema supplied. Perhaps you meant http://Ldk64CcSIXJkCs8pPt2YjpLkZJhaKs_m0pmekzL5SOY-1672941544-0-AaBQfP_66yID5nRb-xUznbW2FVXAWeiOcKnTosB_FUSxeeNSucukyHbo7OxAcJttrfA4pKDGasC8MvQz6o0cFBZv0VU_RfYXUn7Z6iVg5eVp7n_O4P9Zzk0IiE132EMNR-Xn0_gEfYM8DMCX5lS4yEgrs9hwhdJIWzS6N3fsDsa3kDjIH9oELTaDEbTbFvUXEkx3212-4NJ6SwCvfUzhtolsD7xJoswFQHjNBFrmUgScEORQpIWTWxzHPvpGTxepQMUPuAEbgNNykNbdp9oyLMDwmUnIqU7hSmeCkYU1RWlbxh95rcgH-yvV9mm3RQnIXT3WfcUE9lM5crnbBcplVCA4jbLP7kk1tu_BFbh-6gstCr0B24gEE5zJ41WGxwTbABhAmK7aAeHbH7V55EBpLOQcpkYhWZNiMMbVsG314TM_tE9UGM8B99FmrWUqCqwMcsGwDDWK7B-uHcDD5nJxQhgV5SlMnS0IVE18Bdu4zqIzT3ZS2sgGf9Drti4P5Qkso3v1pW_fBzq-Mrd6_O7cvwF7FlRc95tOXSjjS0Woc70HGNBNd1kc0ZR9NuwV9TnvPRWbuoYu3HXz65DeWmbGaLOFdHAOUARr1fD9DL9LRDmeAHOGkYkplz9pSbWXR6vYkIInqFnvQKuwhnVOltaWa6_VG3BH0oc9T4xAZdH83DsG6eHtJlitVhH8Sx_PBfukG4x0S1qsmIWUPDwZhwUe55ly0I5ISELLL8Z3tAJpq3zrdyV6CbwOjF7-nPF7aRNuxg? | ||
| 191 | +Trying to fetch pmid 33172971 | ||
| 192 | +Trying genericCitationLabelled | ||
| 193 | +Trying pubmed_central_v2 | ||
| 194 | +Trying acsPublications | ||
| 195 | +Trying uchicagoPress | ||
| 196 | +Trying nejm | ||
| 197 | +Trying futureMedicine | ||
| 198 | +Trying science_direct | ||
| 199 | +** fetching of reprint 33172971 failed from error Invalid URL 'cIp5BepthG7srw3ecaG_06Qhx8PrpoP6WpFwlAopfrE-1672941545-0-AXmWkR9H8dN8IxbsllbsHZ1SigvIVLyZ0euRPz15XW6nX_MsA3Y9dPoL0MovKxj_yUNiDnSYrVSmYNzVo-LEANJZq45ZpzDVv1GFU1qNu0PpI-0YxuWHz4dSudrD_soFz3LsTCtgLamU66ZDSsrVNGTaqqbajetYgnnhu4K-BeYnLmpOxzcMzYU9mgynHjFv0NnrdUU75kJPeOIRpgrUlqm8JRnkMq0SEvI2IPTDW0ToohbWs4bLvLX0GNKGVT_v5to_am4hEVPC9jmkfkkNOLoMmfbnZC-L2EGAKufwZgz17d89HWfaK61no8EW8y5ysZ5A9yTRfN__C_LpTG6FWw2HWyR9FgIvz799f4ysIoz52azp4a7w3G3AHCWdUBUDy6gabo_psIE4mu3dCHLcDzGNO148UT5wzxTfrQV3aatPAWjnaK6-Re0XOkABNINniMLfF6Ti-0WgY-cHyLH2RgKISy_89MeNrVJy22GToy2c_LQwZN3RT3M8M2TFXLXmi9xEE4Z_4kSRA_aRnvRjKJdMJfxhc-BYW1G-dn2SDAetNZZL7HcJW6cGlAIjNWQqTD9ieGfLxGJe0OCLysFkeY3XwRY5vTHQ-xVI-gGKBY0A9gS70DH5t_pS53fBTQZ1pK667ct-BCo5aysuQHLcXrlE9coo8k8vtQKrmQ5-Fxp2ZNV_MKLY5yqBj5yAWJI5b_O-Mp5TyE9Zzyte_cTXqYtO14DAr1ev8TwqZP3YNbunHBcvIO20uVjbNxc4m--ARi5MMuxcDg4Kvju8Dbf3YKM': No schema supplied. Perhaps you meant http://cIp5BepthG7srw3ecaG_06Qhx8PrpoP6WpFwlAopfrE-1672941545-0-AXmWkR9H8dN8IxbsllbsHZ1SigvIVLyZ0euRPz15XW6nX_MsA3Y9dPoL0MovKxj_yUNiDnSYrVSmYNzVo-LEANJZq45ZpzDVv1GFU1qNu0PpI-0YxuWHz4dSudrD_soFz3LsTCtgLamU66ZDSsrVNGTaqqbajetYgnnhu4K-BeYnLmpOxzcMzYU9mgynHjFv0NnrdUU75kJPeOIRpgrUlqm8JRnkMq0SEvI2IPTDW0ToohbWs4bLvLX0GNKGVT_v5to_am4hEVPC9jmkfkkNOLoMmfbnZC-L2EGAKufwZgz17d89HWfaK61no8EW8y5ysZ5A9yTRfN__C_LpTG6FWw2HWyR9FgIvz799f4ysIoz52azp4a7w3G3AHCWdUBUDy6gabo_psIE4mu3dCHLcDzGNO148UT5wzxTfrQV3aatPAWjnaK6-Re0XOkABNINniMLfF6Ti-0WgY-cHyLH2RgKISy_89MeNrVJy22GToy2c_LQwZN3RT3M8M2TFXLXmi9xEE4Z_4kSRA_aRnvRjKJdMJfxhc-BYW1G-dn2SDAetNZZL7HcJW6cGlAIjNWQqTD9ieGfLxGJe0OCLysFkeY3XwRY5vTHQ-xVI-gGKBY0A9gS70DH5t_pS53fBTQZ1pK667ct-BCo5aysuQHLcXrlE9coo8k8vtQKrmQ5-Fxp2ZNV_MKLY5yqBj5yAWJI5b_O-Mp5TyE9Zzyte_cTXqYtO14DAr1ev8TwqZP3YNbunHBcvIO20uVjbNxc4m--ARi5MMuxcDg4Kvju8Dbf3YKM? | ||
| 200 | +Trying to fetch pmid 34428301 | ||
| 201 | +Trying genericCitationLabelled | ||
| 202 | +Trying pubmed_central_v2 | ||
| 203 | +** fetching reprint using the 'pubmed central' finder... | ||
| 204 | +** fetching of reprint 34428301 succeeded | ||
| 205 | +Trying to fetch pmid 34791440 | ||
| 206 | +Trying genericCitationLabelled | ||
| 207 | +Trying pubmed_central_v2 | ||
| 208 | +** fetching reprint using the 'pubmed central' finder... | ||
| 209 | +** fetching of reprint 34791440 succeeded | ||
| 210 | +Trying to fetch pmid 9140061 | ||
| 211 | +Trying genericCitationLabelled | ||
| 212 | +Trying pubmed_central_v2 | ||
| 213 | +Trying acsPublications | ||
| 214 | +Trying uchicagoPress | ||
| 215 | +Trying nejm | ||
| 216 | +Trying futureMedicine | ||
| 217 | +Trying science_direct | ||
| 218 | +** fetching of reprint 9140061 failed from error Invalid URL 'jcZKP5wy_U5fZwQzMnXsls3TZuFKtLfM.TRt.Bh4d9k-1672941556-0-ARg_NFIhKoWkMSTkM9K0NNsEvXccNV6TpzvRoQ-2vYjK8XkBbOYBTbXDk7ayo2JehzHWQXv5Q_R2fac_6YNLMbXVLvOx_MPE2G55FZnUH5eyYoAVkc294_DbWF4BkOBr9bbRZ77KShHUYqJjAOi2O7mvSeGRhr8aCrq258YcVJ0FBdP8Q5tuy2CNWxi_udpInouGKC_Bnbb4D6LtrmOH2qchHRdKNei5ina55N2xPiH6jVDZ21jK0SkCSagtetSHnT7A-CfaFwqG5cz5lnOs1l1bBFEcOFdNNkmvz5yGZK-RR1-gynCmgS1ixfHapDjmCyogIfAxI1oumhPQoHCCg8-OqSgMSXHbgJdPWvc5L68Unmk5BAZNeFU2F_-xInoVtpYPwJNkeyldxj98PbHPAYg-SqmRtv0MyKm9qcEZJIULlfwTZ2ZGAm_uAwcQ7fW_O9VfUNBlbt2SohoYWfCtILAc2Imgon6vNbdisaxRkf70SZuD0G-Fj2SCsAYhkQrqPCdJAEEfWJ1QiddGb32kTSnXCoupFAWbX441Xj4nOj5OaRem_6JScd2AJp-YxSNI0Nm4IrB8s5O_lG1o_BDYlplFwbKozatP9ckn0jeXx38wInIuKOjUgl9B_T2Xvkg6sCNxXUWsHXiHMkhQ3x2AEh47zf4T6vQoTi0wNMkUVtkNTh8gOviKKl74Pi4m3yyq1ICnA9L9D6E6MLuE_ZOfmVBM79sVEgN8jsDBojevmYv96r09rQaQ_9c5cFZk7E25jQ': No schema supplied. Perhaps you meant http://jcZKP5wy_U5fZwQzMnXsls3TZuFKtLfM.TRt.Bh4d9k-1672941556-0-ARg_NFIhKoWkMSTkM9K0NNsEvXccNV6TpzvRoQ-2vYjK8XkBbOYBTbXDk7ayo2JehzHWQXv5Q_R2fac_6YNLMbXVLvOx_MPE2G55FZnUH5eyYoAVkc294_DbWF4BkOBr9bbRZ77KShHUYqJjAOi2O7mvSeGRhr8aCrq258YcVJ0FBdP8Q5tuy2CNWxi_udpInouGKC_Bnbb4D6LtrmOH2qchHRdKNei5ina55N2xPiH6jVDZ21jK0SkCSagtetSHnT7A-CfaFwqG5cz5lnOs1l1bBFEcOFdNNkmvz5yGZK-RR1-gynCmgS1ixfHapDjmCyogIfAxI1oumhPQoHCCg8-OqSgMSXHbgJdPWvc5L68Unmk5BAZNeFU2F_-xInoVtpYPwJNkeyldxj98PbHPAYg-SqmRtv0MyKm9qcEZJIULlfwTZ2ZGAm_uAwcQ7fW_O9VfUNBlbt2SohoYWfCtILAc2Imgon6vNbdisaxRkf70SZuD0G-Fj2SCsAYhkQrqPCdJAEEfWJ1QiddGb32kTSnXCoupFAWbX441Xj4nOj5OaRem_6JScd2AJp-YxSNI0Nm4IrB8s5O_lG1o_BDYlplFwbKozatP9ckn0jeXx38wInIuKOjUgl9B_T2Xvkg6sCNxXUWsHXiHMkhQ3x2AEh47zf4T6vQoTi0wNMkUVtkNTh8gOviKKl74Pi4m3yyq1ICnA9L9D6E6MLuE_ZOfmVBM79sVEgN8jsDBojevmYv96r09rQaQ_9c5cFZk7E25jQ? | ||
| 219 | +Trying to fetch pmid 32662815 | ||
| 220 | +Trying genericCitationLabelled | ||
| 221 | +Trying pubmed_central_v2 | ||
| 222 | +** fetching reprint using the 'pubmed central' finder... | ||
| 223 | +** fetching of reprint 32662815 succeeded | ||
| 224 | +Trying to fetch pmid 32817380 | ||
| 225 | +Trying genericCitationLabelled | ||
| 226 | +Trying pubmed_central_v2 | ||
| 227 | +Trying acsPublications | ||
| 228 | +Trying uchicagoPress | ||
| 229 | +Trying nejm | ||
| 230 | +Trying futureMedicine | ||
| 231 | +Trying science_direct | ||
| 232 | +** fetching of reprint 32817380 failed from error Invalid URL '12Kdq1Gu6s3H_c9wV7MWTwYP1d4sz7FRpWv08o2fehs-1672941561-0-AQzKoSKCDX92o3mW4XorQJa2qF1s8dsMn24r1239tDd-OxIEJ-xofWlfZb7cmDWmkZ-d4uCYOdMgyimJ9BwqBkuJKbRguJ_HaG4KzuT0CwTAflqmSgiP6oaZRbxRIMOl3LAnhQXawFffYKLbyEKG9hEWBeEbs31LlzwG7k7IbodBBPNfYicYC2QJy8RZ5xHWPTXxcwshhdG__QByEK9fJ6RYaR8LVhOwXo-m6nKcnmcvdFAubYorAvVvggpPCiIA0EYouK_-KA_Et9mXMtoRPVhEKeO03k9LAejSpvDDd8praPe4uYMGyBe4ruFtFbjqOdJgmlwSt_hPsHu_iFLkl6eW-V_dW5iwEQOE9z1jSjKf1ZHznUnde5Nzlh3v0wV2po1Y1QuFKuy8_IO-DB4iU3MlzHKgWqCsAeLorSaui7KqJAGzqmM3Keurq7J4URVd8khAGmHXMZHt3u96krRlFp3Nsc1_jwJEKKLxr44FVFla7XnqlQIHXdzj9FffjdPd1R_p3G-UEYGLzL32dFulkql4INTbOR625BrjoAvw74XDQRcNE_P72PYyCRUSIarPTtFTQBMSfpxRaOprcTZfMR_U5zdY0uGixU3srbPeduCUA7tQOFiCiLoTD_odsa75NYCv9o_me1vJSA823Md4hCV947suGwjybNaQP-R-yrffAfni7dQRYMt-mjEHk5LtnebhpJi1G44UN_WFSDpOkB6lvKO7Qc-eoUXnm4DbeysVDTRAmVi94HkcG9tc1U7BeSVyXNUfq3C1Vr_1jJXCgUI': No schema supplied. Perhaps you meant http://12Kdq1Gu6s3H_c9wV7MWTwYP1d4sz7FRpWv08o2fehs-1672941561-0-AQzKoSKCDX92o3mW4XorQJa2qF1s8dsMn24r1239tDd-OxIEJ-xofWlfZb7cmDWmkZ-d4uCYOdMgyimJ9BwqBkuJKbRguJ_HaG4KzuT0CwTAflqmSgiP6oaZRbxRIMOl3LAnhQXawFffYKLbyEKG9hEWBeEbs31LlzwG7k7IbodBBPNfYicYC2QJy8RZ5xHWPTXxcwshhdG__QByEK9fJ6RYaR8LVhOwXo-m6nKcnmcvdFAubYorAvVvggpPCiIA0EYouK_-KA_Et9mXMtoRPVhEKeO03k9LAejSpvDDd8praPe4uYMGyBe4ruFtFbjqOdJgmlwSt_hPsHu_iFLkl6eW-V_dW5iwEQOE9z1jSjKf1ZHznUnde5Nzlh3v0wV2po1Y1QuFKuy8_IO-DB4iU3MlzHKgWqCsAeLorSaui7KqJAGzqmM3Keurq7J4URVd8khAGmHXMZHt3u96krRlFp3Nsc1_jwJEKKLxr44FVFla7XnqlQIHXdzj9FffjdPd1R_p3G-UEYGLzL32dFulkql4INTbOR625BrjoAvw74XDQRcNE_P72PYyCRUSIarPTtFTQBMSfpxRaOprcTZfMR_U5zdY0uGixU3srbPeduCUA7tQOFiCiLoTD_odsa75NYCv9o_me1vJSA823Md4hCV947suGwjybNaQP-R-yrffAfni7dQRYMt-mjEHk5LtnebhpJi1G44UN_WFSDpOkB6lvKO7Qc-eoUXnm4DbeysVDTRAmVi94HkcG9tc1U7BeSVyXNUfq3C1Vr_1jJXCgUI? | ||
| 233 | +Trying to fetch pmid 32849447 | ||
| 234 | +Trying genericCitationLabelled | ||
| 235 | +Trying pubmed_central_v2 | ||
| 236 | +** fetching reprint using the 'pubmed central' finder... | ||
| 237 | +** fetching of reprint 32849447 succeeded | ||
| 238 | +Trying to fetch pmid 33068046 | ||
| 239 | +Trying genericCitationLabelled | ||
| 240 | +Trying pubmed_central_v2 | ||
| 241 | +Trying acsPublications | ||
| 242 | +Trying uchicagoPress | ||
| 243 | +Trying nejm | ||
| 244 | +Trying futureMedicine | ||
| 245 | +Trying science_direct | ||
| 246 | +** fetching of reprint 33068046 failed from error Invalid URL 'Ca2tz3FARdmZBsByuyNuWeiql2uee1VreT.kVjY7yrk-1672941567-0-AexEfZqeSZGpnAKuvb4N24mFbbARpMqS4Bl7rq2oJaJLQy4XNqEXY1SvQ53OXwzuh9s8hJpJSmZKZ90s8So4WTMitRZFt0iwKRvwq5PfF8ZF-spmYvUyZmqSAcRty7hyAnlIItHCbvd0DXymu2foqGLiY7_Azyn4oIZjqDWZgwUu4cttCsPTlTJtscKhrnIDiTC2AD-6BrcAHq2eFMQXn27imPIx1RCRlJshGeDr1vbtfjlBg89wEfvUQMpUEgz-xVlFP2tkES_AqE3RIqDBCDIDkDuwxhKZ5d-k_PxAuN3Vbx-1nlLI7WeIZH3b-qHkPWg8ifOx6RsMU_A02ZEHMrjlftm66SFQ60Wsria5dpTeLxvGd34BBngLodgDKaYoG0ztHkPImcz4lT76J7-QCgKcV7O86u_4mEpHhONMbCRBLtVhcFVAX-zAMIyOWzECJ6x0Sau9cAqssr2l_Q1VT-f4uCaFA5KpmuC3IHUZQABkrvM9nh0uOhB2e7ln9OfxBG89KhjhGPRhio2LRDY4yprcBdzS-dNl1pedPEXENepuOg0R645bq0poGP4uKeYHuQ': No schema supplied. Perhaps you meant http://Ca2tz3FARdmZBsByuyNuWeiql2uee1VreT.kVjY7yrk-1672941567-0-AexEfZqeSZGpnAKuvb4N24mFbbARpMqS4Bl7rq2oJaJLQy4XNqEXY1SvQ53OXwzuh9s8hJpJSmZKZ90s8So4WTMitRZFt0iwKRvwq5PfF8ZF-spmYvUyZmqSAcRty7hyAnlIItHCbvd0DXymu2foqGLiY7_Azyn4oIZjqDWZgwUu4cttCsPTlTJtscKhrnIDiTC2AD-6BrcAHq2eFMQXn27imPIx1RCRlJshGeDr1vbtfjlBg89wEfvUQMpUEgz-xVlFP2tkES_AqE3RIqDBCDIDkDuwxhKZ5d-k_PxAuN3Vbx-1nlLI7WeIZH3b-qHkPWg8ifOx6RsMU_A02ZEHMrjlftm66SFQ60Wsria5dpTeLxvGd34BBngLodgDKaYoG0ztHkPImcz4lT76J7-QCgKcV7O86u_4mEpHhONMbCRBLtVhcFVAX-zAMIyOWzECJ6x0Sau9cAqssr2l_Q1VT-f4uCaFA5KpmuC3IHUZQABkrvM9nh0uOhB2e7ln9OfxBG89KhjhGPRhio2LRDY4yprcBdzS-dNl1pedPEXENepuOg0R645bq0poGP4uKeYHuQ? | ||
| 247 | +Trying to fetch pmid 33072717 | ||
| 248 | +Trying genericCitationLabelled | ||
| 249 | +Trying pubmed_central_v2 | ||
| 250 | +** fetching reprint using the 'pubmed central' finder... | ||
| 251 | +** fetching of reprint 33072717 succeeded | ||
| 252 | +Trying to fetch pmid 33136147 | ||
| 253 | +Trying genericCitationLabelled | ||
| 254 | +** fetching reprint using the 'generic citation labelled' finder... | ||
| 255 | +** fetching of reprint 33136147 succeeded | ||
| 256 | +Trying to fetch pmid 33318048 | ||
| 257 | +Trying genericCitationLabelled | ||
| 258 | +Trying pubmed_central_v2 | ||
| 259 | +Trying acsPublications | ||
| 260 | +Trying uchicagoPress | ||
| 261 | +Trying nejm | ||
| 262 | +Trying futureMedicine | ||
| 263 | +Trying science_direct | ||
| 264 | +** fetching of reprint 33318048 failed from error Invalid URL 'H_HDOMjPFqBRBn9CIvFp3.MLyt6.Cr1yqjPqy7_.dNo-1672941585-0-ASGJyet_JMjh-n9RCjZP1usTaU-rAh_oVPlNBV8Ox06oZLjLmr4nLazOPGibTSzbDun4wRRfxjJD1cl8pFLvWgZNLCwgScfdMEuTEYcHelG8wh84ZPO7-PimWyY4a-Ax_JW2wfMsWOrdcFRmKRfdjpL4MFDyEGVMcjhzP9y84LW4EnDyNZqVSkX_y8VAxIbmaMeuS-EiSakyeV1RnV4_bKjzzuQXXLtk8wIhc-rF2VoAiFgTRP3-kR7Y02rLN1opo-OYhoQ29Xy2fAHIKm2pS-qBW0XNRWiOU6q8_YMmMZrWbskiukxzgyZO5MutUF8ygYDuzaZDjX0BtuezjJEtcKWslPbaM1gXj1L8Yy3U7YCwi-_CPUjNOrvFnW0EEm5jeKDUVwwIeY1-sd54wUjlnn86c6qAqpaI4unKjLk4makfoIUlKr4B62VwsTRnrfbZxbqDTyl5jZjFIGiHmrmzPXxt1QG7SrQwApYoGQYFiijEUTw-7IM3t7bXcwRYTMVfbXUEhv8JrzvShSa8x1fDEwHgU2fUnY6BoCOrpC9hZShy1xlZSOOpw4AHgCW272GoFIr2PJ-Zy1UNze2TXebaUegtUpleiM-BhsDhqCAaGxAj9SQsD153z7wtiM6kCMnHOz9IhaKIkgYpKYgXwcQmuzLZUWgWJFJ0lqYeSgvlKAgHjzRY_3Jt2gPT3L2GcgUZQXWWRx4Hs4jL2tUvAiOuqPfvPWSFVjGTZPjZCd3VVFrqpcZCh2v85PiksdgNk05aMA': No schema supplied. Perhaps you meant http://H_HDOMjPFqBRBn9CIvFp3.MLyt6.Cr1yqjPqy7_.dNo-1672941585-0-ASGJyet_JMjh-n9RCjZP1usTaU-rAh_oVPlNBV8Ox06oZLjLmr4nLazOPGibTSzbDun4wRRfxjJD1cl8pFLvWgZNLCwgScfdMEuTEYcHelG8wh84ZPO7-PimWyY4a-Ax_JW2wfMsWOrdcFRmKRfdjpL4MFDyEGVMcjhzP9y84LW4EnDyNZqVSkX_y8VAxIbmaMeuS-EiSakyeV1RnV4_bKjzzuQXXLtk8wIhc-rF2VoAiFgTRP3-kR7Y02rLN1opo-OYhoQ29Xy2fAHIKm2pS-qBW0XNRWiOU6q8_YMmMZrWbskiukxzgyZO5MutUF8ygYDuzaZDjX0BtuezjJEtcKWslPbaM1gXj1L8Yy3U7YCwi-_CPUjNOrvFnW0EEm5jeKDUVwwIeY1-sd54wUjlnn86c6qAqpaI4unKjLk4makfoIUlKr4B62VwsTRnrfbZxbqDTyl5jZjFIGiHmrmzPXxt1QG7SrQwApYoGQYFiijEUTw-7IM3t7bXcwRYTMVfbXUEhv8JrzvShSa8x1fDEwHgU2fUnY6BoCOrpC9hZShy1xlZSOOpw4AHgCW272GoFIr2PJ-Zy1UNze2TXebaUegtUpleiM-BhsDhqCAaGxAj9SQsD153z7wtiM6kCMnHOz9IhaKIkgYpKYgXwcQmuzLZUWgWJFJ0lqYeSgvlKAgHjzRY_3Jt2gPT3L2GcgUZQXWWRx4Hs4jL2tUvAiOuqPfvPWSFVjGTZPjZCd3VVFrqpcZCh2v85PiksdgNk05aMA? | ||
| 265 | +``` | ||
| 266 | + | ||
| 267 | +# Text extraction from PDF | ||
| 268 | +We sent PDf files to Lisen&Curate team for extracting text. | ||
| 269 | + |
| 1 | +21097887 | ||
| 2 | +23818864 | ||
| 3 | +24947454 | ||
| 4 | +25222563 | ||
| 5 | +25275371 | ||
| 6 | +25735747 | ||
| 7 | +26258987 | ||
| 8 | +26279566 | ||
| 9 | +26670385 | ||
| 10 | +26673755 | ||
| 11 | +28061857 | ||
| 12 | +28526842 | ||
| 13 | +29394395 | ||
| 14 | +30137486 | ||
| 15 | +30389436 | ||
| 16 | +30420454 | ||
| 17 | +33172971 | ||
| 18 | +34428301 | ||
| 19 | +34791440 | ||
| 20 | +9140061 | ||
| 21 | +32662815 | ||
| 22 | +32817380 | ||
| 23 | +32849447 | ||
| 24 | +33068046 | ||
| 25 | +33072717 | ||
| 26 | +33136147 | ||
| 27 | +33318048 |
-
Please register or login to post a comment