Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F102759580
sentfeat.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Feb 23, 22:03
Size
1 KB
Mime Type
text/x-python
Expires
Tue, Feb 25, 22:03 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
24417083
Attached To
rTZUCT ML_Project1
sentfeat.py
View Options
import
numpy
as
np
files
=
[
"pre_test_data.txt"
,
"pre_train_neg_full.txt"
,
"pre_train_pos_full.txt"
]
def
sentence_features
(
name
):
sentences
=
open
(
name
)
.
read
()
.
splitlines
()
vocab
=
open
(
"vocab_cut.txt"
,
"r"
)
.
read
()
.
splitlines
()
print
(
len
(
vocab
))
word_vec
=
np
.
load
(
"embeddings.npy"
)
print
(
word_vec
.
shape
)
sentence_vec
=
np
.
zeros
((
len
(
sentences
),
word_vec
.
shape
[
1
]))
for
i
in
range
(
len
(
sentences
)):
#get i-th sentnece
temp_sent
=
sentences
[
i
]
#get all words (they are seperated by spaces)
temp_seperated_words
=
temp_sent
.
split
()
temp_sent_vect
=
np
.
zeros
(
word_vec
.
shape
[
1
])
for
j
in
range
(
len
(
temp_seperated_words
)):
temp_sent_vect
=
lookup_word_vektor
(
vocab
,
word_vec
,
temp_seperated_words
[
j
],
temp_sent_vect
)
sentence_vec
[
i
]
=
temp_sent_vect
#np.r_[sentence_vec,[temp_sent_vect]]#sentence_vec.concatenate(temp_sent_vect)
if
i
%
1000
==
0
:
print
(
str
(
i
)
+
"/"
+
str
(
len
(
sentences
)))
return
sentence_vec
,
name
def
lookup_word_vektor
(
vocab
,
word_vec
,
word
,
temp
):
if
word
in
vocab
:
#vocab.contains(word):
#print("the word: " + word)
index
=
vocab
.
index
(
word
)
#print("the index: " + str(index))
to_add
=
word_vec
[
index
]
#print("to add: ")
#print(to_add)
temp
=
temp
+
to_add
#print(temp)
return
temp
def
save_sentence_feature
():
for
filename
in
files
:
the_sentence_feature
=
sentence_features
(
filename
)
np
.
save
(
filename
[:
-
4
]
+
"_feature"
,
the_sentence_feature
)
if
__name__
==
'__main__'
:
save_sentence_feature
()
#ce_feature()
Event Timeline
Log In to Comment