-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathsvm-remap.py
More file actions
executable file
·64 lines (48 loc) · 1.97 KB
/
svm-remap.py
File metadata and controls
executable file
·64 lines (48 loc) · 1.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python3
"""Remap SVM target of every line from one numeric target to another
based on the name file of every target.
@author Dat Hoang
@date March 2011"""
import sys
import csv
from optparse import OptionParser
FIELD_DELIMITER='\t'
SVM_DELIMITER=' '
def gen_remap_table(from_fp, to_fp):
"""Generate a hash table to remap category names between SVM files."""
freader = csv.reader(from_fp, delimiter=FIELD_DELIMITER)
treader = csv.reader(to_fp, delimiter=FIELD_DELIMITER)
ftable = dict((i, field) for i, field in freader)
ttable = dict((field, i) for i, field in treader)
return dict((i, ttable[field]) for i, field in ftable.items())
def svm_remap(remap_table, in_fp, out_fp):
"""Remap all category integers according to remap table."""
def remap(category):
return remap_table[category]
reader = csv.reader(in_fp, delimiter=SVM_DELIMITER)
for line in reader:
category, *rest = line
out_fp.write("%s%s%s\n" % (
remap(category), SVM_DELIMITER, SVM_DELIMITER.join(rest)))
def main():
parser = OptionParser(
usage="""Usage: %prog <from-fields> <to-fields> <in-svm> <out-svm>
<from-fields> := file of fields to map from
<to-fields> := file of fields to map to
<in-svm> := input SVM file to remap ( - := /dev/stdin)
<out-svm> := output SVM file ( - := /dev/stdout)""")
(_, args) = parser.parse_args()
if len(args) != 4:
parser.print_usage(file=sys.stderr)
return 1
from_fields = args[0]
to_fields = args[1]
svm_in = "/dev/stdin" if args[2]=='-' else args[2]
svm_out = "/dev/stdout" if args[3]=='-' else args[3]
with open(from_fields, 'r') as from_fp, open(to_fields, 'r') as to_fp:
remap_table = gen_remap_table(from_fp, to_fp)
with open(svm_in, 'r') as in_fp, open(svm_out, 'w') as out_fp:
svm_remap(remap_table, in_fp, out_fp)
return 0
if __name__=="__main__":
sys.exit(main())