import pickle import nltk import psycopg2 import string from xml.dom import minidom conn = psycopg2.connect("dbname=daniel user=daniel") cur = conn.cursor() def get_word_features(wordlist): wordlist = nltk.FreqDist(wordlist) word_features = wordlist.keys() return word_features def extract_features(document): document_words = set(document) features = {} for word in word_features: features['contains(%s)' % word] = (word in document_words) return features inputFile = open('classifier.pkl', 'rb') pickleData = pickle.load(inputFile) classifier = pickleData[0] word_features = pickleData[1] inputFile.close() print "pickled classifier loaded.!" cur.execute("select distinct feed from headlines") for feed in cur.fetchall(): print feed cur.execute("select headline, id from headlines where feed = %s", feed) for headline in cur.fetchall(): dist = classifier.prob_classify(extract_features(headline[0].split())) cur.execute("update headlines set sentProb = %s where id = %s", (dist.prob("positive"), headline[1])) conn.commit() #print classifier.classify(extract_features(headline[0].split())), dist.prob("positive"), " - ", headline[0] #for label in dist.samples(): # print ("%s: %f" % (label, dist.prob(label))) conn.commit() cur.close() conn.close()