from sklearn.feature_selection import f_regression import numpy as np from sklearn import svm from sklearn import linear_model import svmcrossvalidate from array import array
Main
#f = open(“testdata1.txt”) f = open(“testdata.txt”) mylist = f.readlines() testdata = [] for i in range(0, len(mylist), 1): l = mylist[i].split() for j in range(0, len(l), 1): l[j] = float(l[j]) #testdata.append(l) testdata.append(array(‘f’,l)) f.close()
#print(testdata)
#f = open(“traindata1.txt”) f = open(“traindata.txt”) mylist = f.readlines() train = [] for i in range(0, len(mylist), 1): l = mylist[i].split() for j in range(0, len(l), 1): l[j] = float(l[j]) #train.append(l) train.append(array(‘f’,l)) f.close() #print(train) #X is for train data X = train
#f = open(“trueclass1.txt”) f = open(“trueclass.txt”) mylist = f.readlines() trainlabels = [] for i in range(0, len(mylist), 1): l = mylist[i].split() for j in range(0, len(l), 1): l[j] = float(l[j]) trainlabels.append(l[0]) f.close()
y = trainlabels #print(trainlabels)
#f_output = f_regression(X,y) f_output = f_regression(X, y, center=True)
#print(f_output[0]) #print(f_output[1])
cols = len(X[0]) indices = [] for i in range(0, cols, 1): indices.append(i) fscores = f_output[0] fscores_dict = {} for i in range(0, len(f_output[0]), 1): fscores_dict[i] = fscores[i]
sorted_indices = sorted(indices, key=fscores_dict.getitem, reverse=True)
#print(sorted_indices) print(sorted_indices[:15])
Reduce both traindata and testdata to top 15 ranked features
newtestdata= [] newtrain = []
rows = len(testdata) cols = len(testdata[0])
print(“testdata”) print(rows) print(cols)
for i in range(0, rows, 1): l1 = [] for j in range(0, cols, 1): if (j in sorted_indices[:15]): l1.append(testdata[i][j]) newtestdata.append(l1)
rows = len(train) cols = len(train[0])
print(“traindata”) print(rows) print(cols)
for i in range(0, rows, 1):
l2 = []
for j in range(0, cols, 1):
if (j in sorted_indices[:15]):
l2.append(train[i][j])
newtrain.append(l2)
#print(newtestdata)
#print(newtrain)
Cross-validated linear SVM
[bestC,besterror] = svmcrossvalidate.getbestC(newtrain,trainlabels)
print(“Best C = “, bestC) print(“Best cross validation error = “, besterror)
Predict labels of test data
clf = svm.LinearSVC(C=bestC, max_iter=100000) clf.fit(train,trainlabels) prediction = clf.predict(testdata)
f = open(“testlabel_prediction.txt”, ‘w’) for i in range(0, len(prediction), 1): #print(“Predict test label:”, int(prediction[i])) f.write(str(int(prediction[i]))+ “ “ + str(i) + “\n”) f.close()