input_ids=torch.tensor([[14,447]],dtype=torch.long,device=torch_device)# the president
expected_output_ids=[
14,
447,
14,
447,
14,
447,
14,
447,
14,
447,
14,
447,
14,
447,
14,
447,
14,
447,
14,
447,
]# the president the president the president the president the president the president the president the president the president the president
# TODO(PVP): this and other input_ids I tried for generation give pretty bad results. Not sure why. Model might just not be made for auto-regressive inference