Page MenuHomec4science

sentfeat.py
No OneTemporary

File Metadata

Created
Sun, Feb 23, 22:03

sentfeat.py

import numpy as np
files = ["pre_test_data.txt", "pre_train_neg_full.txt", "pre_train_pos_full.txt"]
def sentence_features(name):
sentences = open(name).read().splitlines()
vocab = open("vocab_cut.txt","r").read().splitlines()
print(len(vocab))
word_vec = np.load("embeddings.npy")
print(word_vec.shape)
sentence_vec=np.zeros((len(sentences),word_vec.shape[1]))
for i in range(len(sentences)):
#get i-th sentnece
temp_sent=sentences[i]
#get all words (they are seperated by spaces)
temp_seperated_words=temp_sent.split()
temp_sent_vect=np.zeros(word_vec.shape[1])
for j in range(len(temp_seperated_words)):
temp_sent_vect=lookup_word_vektor(vocab,word_vec,temp_seperated_words[j],temp_sent_vect)
sentence_vec[i] = temp_sent_vect #np.r_[sentence_vec,[temp_sent_vect]]#sentence_vec.concatenate(temp_sent_vect)
if i%1000 == 0:
print(str(i)+"/"+str(len(sentences)))
return sentence_vec, name
def lookup_word_vektor(vocab,word_vec,word,temp):
if word in vocab:#vocab.contains(word):
#print("the word: " + word)
index=vocab.index(word)
#print("the index: " + str(index))
to_add=word_vec[index]
#print("to add: ")
#print(to_add)
temp=temp+to_add
#print(temp)
return temp
def save_sentence_feature():
for filename in files:
the_sentence_feature = sentence_features(filename)
np.save(filename[:-4] +"_feature", the_sentence_feature)
if __name__ == '__main__':
save_sentence_feature()#ce_feature()

Event Timeline