Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F120579378
test_tokenization_blenderbot.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Jul 5, 09:09
Size
1 KB
Mime Type
text/x-python
Expires
Mon, Jul 7, 09:09 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
27206248
Attached To
R11484 ADDI
test_tokenization_blenderbot.py
View Options
#!/usr/bin/env python3
# coding=utf-8
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for Blenderbot Tokenizers, including common tests for BlenderbotSmallTokenizer."""
import
unittest
from
transformers.file_utils
import
cached_property
from
transformers.models.blenderbot.tokenization_blenderbot
import
BlenderbotTokenizer
class
Blenderbot3BTokenizerTests
(
unittest
.
TestCase
):
@cached_property
def
tokenizer_3b
(
self
):
return
BlenderbotTokenizer
.
from_pretrained
(
"facebook/blenderbot-3B"
)
def
test_encode_decode_cycle
(
self
):
tok
=
self
.
tokenizer_3b
src_text
=
" I am a small frog."
encoded
=
tok
([
src_text
],
padding
=
False
,
truncation
=
False
)[
"input_ids"
]
decoded
=
tok
.
batch_decode
(
encoded
,
skip_special_tokens
=
True
,
clean_up_tokenization_spaces
=
False
)[
0
]
assert
src_text
==
decoded
def
test_3B_tokenization_same_as_parlai
(
self
):
assert
self
.
tokenizer_3b
.
add_prefix_space
assert
self
.
tokenizer_3b
([
" Sam"
,
"Sam"
])
.
input_ids
==
[[
5502
,
2
],
[
5502
,
2
]]
Event Timeline
Log In to Comment