Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F60636900
test_generation_utils.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, May 1, 15:25
Size
3 KB
Mime Type
text/x-python
Expires
Fri, May 3, 15:25 (2 d)
Engine
blob
Format
Raw Data
Handle
17387927
Attached To
R11484 ADDI
test_generation_utils.py
View Options
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
random
import
unittest
import
timeout_decorator
from
..file_utils
import
cached_property
,
is_torch_available
from
..testing_utils
import
require_torch
if
is_torch_available
():
import
torch
from
..models.marian
import
MarianConfig
,
MarianMTModel
@require_torch
class
GenerationUtilsTest
(
unittest
.
TestCase
):
@cached_property
def
config
(
self
):
config
=
MarianConfig
.
from_pretrained
(
"sshleifer/tiny-marian-en-de"
)
return
config
@cached_property
def
model
(
self
):
return
MarianMTModel
(
self
.
config
)
def
test_postprocess_next_token_scores
(
self
):
config
=
self
.
config
model
=
self
.
model
# Initialize an input id tensor with batch size 8 and sequence length 12
input_ids
=
torch
.
arange
(
0
,
96
,
1
)
.
view
((
8
,
12
))
eos
=
config
.
eos_token_id
bad_words_ids_test_cases
=
[[[
299
]],
[[
23
,
24
],
[
54
]],
[[
config
.
eos_token_id
]],
[]]
masked_scores
=
[
[(
0
,
299
),
(
1
,
299
),
(
2
,
299
),
(
3
,
299
),
(
4
,
299
),
(
5
,
299
),
(
6
,
299
),
(
7
,
299
)],
[(
1
,
24
),
(
0
,
54
),
(
1
,
54
),
(
2
,
54
),
(
3
,
54
),
(
4
,
54
),
(
5
,
54
),
(
6
,
54
),
(
7
,
54
)],
[(
0
,
eos
),
(
1
,
eos
),
(
2
,
eos
),
(
3
,
eos
),
(
4
,
eos
),
(
5
,
eos
),
(
6
,
eos
),
(
7
,
eos
)],
[],
]
for
test_case_index
,
bad_words_ids
in
enumerate
(
bad_words_ids_test_cases
):
# Initialize a scores tensor with batch size 8 and vocabulary size 300
scores
=
torch
.
rand
((
8
,
300
))
output
=
model
.
postprocess_next_token_scores
(
scores
,
input_ids
,
0
,
bad_words_ids
,
13
,
15
,
config
.
max_length
,
config
.
eos_token_id
,
config
.
repetition_penalty
,
32
,
5
,
)
for
masked_score
in
masked_scores
[
test_case_index
]:
self
.
assertTrue
(
output
[
masked_score
[
0
],
masked_score
[
1
]]
==
-
float
(
"inf"
))
@timeout_decorator.timeout
(
10
)
def
test_postprocess_next_token_scores_large_bad_words_list
(
self
):
config
=
self
.
config
model
=
self
.
model
# Initialize an input id tensor with batch size 8 and sequence length 12
input_ids
=
torch
.
arange
(
0
,
96
,
1
)
.
view
((
8
,
12
))
bad_words_ids
=
[]
for
_
in
range
(
100
):
length_bad_word
=
random
.
randint
(
1
,
4
)
bad_words_ids
.
append
(
random
.
sample
(
range
(
1
,
300
),
length_bad_word
))
scores
=
torch
.
rand
((
8
,
300
))
_
=
model
.
postprocess_next_token_scores
(
scores
,
input_ids
,
0
,
bad_words_ids
,
13
,
15
,
config
.
max_length
,
config
.
eos_token_id
,
config
.
repetition_penalty
,
32
,
5
,
)
Event Timeline
Log In to Comment