avoid duplicate generate args
Browse files- modeling_minicpmo.py +1 -1
modeling_minicpmo.py
CHANGED
|
@@ -636,6 +636,7 @@ class MiniCPMO(MiniCPMOPreTrainedModel):
|
|
| 636 |
return self.llm(input_ids=None, position_ids=position_ids, inputs_embeds=vllm_embedding, **kwargs)
|
| 637 |
|
| 638 |
def _decode(self, inputs_embeds, tokenizer, attention_mask, **kwargs):
|
|
|
|
| 639 |
terminators = [tokenizer.convert_tokens_to_ids(i) for i in self.terminators]
|
| 640 |
outputs = self.llm.generate(
|
| 641 |
inputs_embeds=inputs_embeds,
|
|
@@ -649,7 +650,6 @@ class MiniCPMO(MiniCPMOPreTrainedModel):
|
|
| 649 |
return outputs
|
| 650 |
|
| 651 |
def _decode_stream(self, inputs_embeds, tokenizer, **kwargs):
|
| 652 |
-
kwargs.pop("output_hidden_states", None)
|
| 653 |
terminators = [tokenizer.convert_tokens_to_ids(i) for i in self.terminators]
|
| 654 |
streamer = TextIteratorStreamer(tokenizer=tokenizer)
|
| 655 |
generation_kwargs = {
|
|
|
|
| 636 |
return self.llm(input_ids=None, position_ids=position_ids, inputs_embeds=vllm_embedding, **kwargs)
|
| 637 |
|
| 638 |
def _decode(self, inputs_embeds, tokenizer, attention_mask, **kwargs):
|
| 639 |
+
kwargs.pop("output_hidden_states", None)
|
| 640 |
terminators = [tokenizer.convert_tokens_to_ids(i) for i in self.terminators]
|
| 641 |
outputs = self.llm.generate(
|
| 642 |
inputs_embeds=inputs_embeds,
|
|
|
|
| 650 |
return outputs
|
| 651 |
|
| 652 |
def _decode_stream(self, inputs_embeds, tokenizer, **kwargs):
|
|
|
|
| 653 |
terminators = [tokenizer.convert_tokens_to_ids(i) for i in self.terminators]
|
| 654 |
streamer = TextIteratorStreamer(tokenizer=tokenizer)
|
| 655 |
generation_kwargs = {
|