regex.py
1.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import fileinput
import re
import sys
if ( len( sys.argv ) < 2 ):
sys.stderr.write( "E: usage: " +sys.argv[0] + " <input_file> <output_file> \n" )
sys.stderr.flush();
exit( 2 );
else:
print("Ok.")
#LEER ARCHIVO INPUT
text_file = open( sys.argv[1], "r" )
dato = text_file.read().splitlines()
text_file.close()
#QUITA EXTENSION DE NOMBRE DE ARCHIVO
split_line = sys.argv[2]
split_line = split_line[:-4]
file_name=""
file_name = split_line + ".san"
open( file_name , 'w').close()
#ESCRIBIR REGEX EN ARGV 2
for line in dato:
line = re.sub('[\(][^\(|^\)]*\s[0-9]+\s[^\(|^\)]*[\)]', '', line.rstrip()) #elimina (_num_)
line = re.sub('[\(][^\(|^\)]*\s[0-9]+\.[0-9]+\s[^\(|^\)]*[\)]', '', line.rstrip()) #elimina (_num.num_)
line = re.sub('[\[][^\(|^\)]*\s[0-9]+\s[^\(|^\)]*[\]]', '', line.rstrip()) #elimina [_num_]
line = re.sub('[\[][^\(|^\)]*\s[0-9]+\.[0-9]+\s[^\(|^\)]*[\]]', '', line.rstrip()) #elimina [_num.num_]
line = re.sub('[\(][^\(|^\)]*\s(fig\s\.|figure|e\s\.\sg\s\.|table)\s[^\(|^\)]*[\)]', '', line.rstrip(), flags=re.I) #
#print(line)
save_file = open( file_name, "a" )
save_file.write(line)
save_file.write("\n")
save_file.close()