Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F120760062
sentfeat.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Jul 6, 20:33
Size
1 KB
Mime Type
text/x-python
Expires
Tue, Jul 8, 20:33 (2 d)
Engine
blob
Format
Raw Data
Handle
27244363
Attached To
rTZUCT ML_Project1
sentfeat.py
View Options
import
numpy
as
np
files
=
[
"pre_test_data.txt"
,
"pre_train_neg_full.txt"
,
"pre_train_pos_full.txt"
]
def
sentence_features
(
name
):
sentences
=
open
(
name
)
.
read
()
.
splitlines
()
vocab
=
open
(
"vocab_cut.txt"
,
"r"
)
.
read
()
.
splitlines
()
print
(
len
(
vocab
))
word_vec
=
np
.
load
(
"embeddings.npy"
)
print
(
word_vec
.
shape
)
sentence_vec
=
np
.
zeros
((
len
(
sentences
),
word_vec
.
shape
[
1
]))
for
i
in
range
(
len
(
sentences
)):
#get i-th sentnece
temp_sent
=
sentences
[
i
]
#get all words (they are seperated by spaces)
temp_seperated_words
=
temp_sent
.
split
()
temp_sent_vect
=
np
.
zeros
(
word_vec
.
shape
[
1
])
for
j
in
range
(
len
(
temp_seperated_words
)):
temp_sent_vect
=
lookup_word_vektor
(
vocab
,
word_vec
,
temp_seperated_words
[
j
],
temp_sent_vect
)
sentence_vec
[
i
]
=
temp_sent_vect
#np.r_[sentence_vec,[temp_sent_vect]]#sentence_vec.concatenate(temp_sent_vect)
if
i
%
1000
==
0
:
print
(
str
(
i
)
+
"/"
+
str
(
len
(
sentences
)))
return
sentence_vec
,
name
def
lookup_word_vektor
(
vocab
,
word_vec
,
word
,
temp
):
if
word
in
vocab
:
#vocab.contains(word):
#print("the word: " + word)
index
=
vocab
.
index
(
word
)
#print("the index: " + str(index))
to_add
=
word_vec
[
index
]
#print("to add: ")
#print(to_add)
temp
=
temp
+
to_add
#print(temp)
return
temp
def
save_sentence_feature
():
for
filename
in
files
:
the_sentence_feature
=
sentence_features
(
filename
)
np
.
save
(
filename
[:
-
4
]
+
"_feature"
,
the_sentence_feature
)
if
__name__
==
'__main__'
:
save_sentence_feature
()
#ce_feature()
Event Timeline
Log In to Comment