提问人:Xanta_Kross 提问时间:10/24/2023 更新时间:10/24/2023 访问量:48
拥抱脸:无法在 kaggle 上使用 Seq2SeqTrainer 找到调谐 Whisper
Hugging face: Unable to find tune Whisper using Seq2SeqTrainer on kaggle
问:
我复制了这个 kaggle 笔记本:https://www.kaggle.com/code/imtiazprio/fast-whisper-large-v2-fine-tuning-with-lora
从其输出中可以看出,它已经成功运行。但是现在我通过将其复制到我的帐户来运行相同的笔记本,它向我显示此错误。
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[16], line 2
1 # with torch.autocast("cuda"):
----> 2 trainer.train()
3 trainer.save_model()
File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:1645, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1640 self.model_wrapped = self.model
1642 inner_training_loop = find_executable_batch_size(
1643 self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
1644 )
-> 1645 return inner_training_loop(
1646 args=args,
1647 resume_from_checkpoint=resume_from_checkpoint,
1648 trial=trial,
1649 ignore_keys_for_eval=ignore_keys_for_eval,
1650 )
File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:1938, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1935 self.control = self.callback_handler.on_step_begin(args, self.state, self.control)
1937 with self.accelerator.accumulate(model):
-> 1938 tr_loss_step = self.training_step(model, inputs)
1940 if (
1941 args.logging_nan_inf_filter
1942 and not is_torch_tpu_available()
1943 and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
1944 ):
1945 # if loss is nan or inf simply add the average of previous logged losses
1946 tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)
File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2770, in Trainer.training_step(self, model, inputs)
2768 scaled_loss.backward()
2769 else:
-> 2770 self.accelerator.backward(loss)
2772 return loss.detach() / self.args.gradient_accumulation_steps
File /opt/conda/lib/python3.10/site-packages/accelerate/accelerator.py:1984, in Accelerator.backward(self, loss, **kwargs)
1982 return
1983 elif self.scaler is not None:
-> 1984 self.scaler.scale(loss).backward(**kwargs)
1985 else:
1986 loss.backward(**kwargs)
File /opt/conda/lib/python3.10/site-packages/torch/_tensor.py:487, in Tensor.backward(self, gradient, retain_graph, create_graph, inputs)
477 if has_torch_function_unary(self):
478 return handle_torch_function(
479 Tensor.backward,
480 (self,),
(...)
485 inputs=inputs,
486 )
--> 487 torch.autograd.backward(
488 self, gradient, retain_graph, create_graph, inputs=inputs
489 )
File /opt/conda/lib/python3.10/site-packages/torch/autograd/__init__.py:200, in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
195 retain_graph = create_graph
197 # The reason we repeat same the comment below is that
198 # some Python versions print out the first line of a multi-line function
199 # calls in the traceback and some print out the last line
--> 200 Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
201 tensors, grad_tensors_, retain_graph, create_graph, inputs,
202 allow_unreachable=True, accumulate_grad=True)
File /opt/conda/lib/python3.10/site-packages/torch/autograd/function.py:274, in BackwardCFunction.apply(self, *args)
270 raise RuntimeError("Implementing both 'backward' and 'vjp' for a custom "
271 "Function is not allowed. You should only implement one "
272 "of them.")
273 user_fn = vjp_fn if vjp_fn is not Function.vjp else backward_fn
--> 274 return user_fn(self, *args)
File /opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:157, in CheckpointFunction.backward(ctx, *args)
153 if len(outputs_with_grad) == 0:
154 raise RuntimeError(
155 "none of output has requires_grad=True,"
156 " this checkpoint() is not necessary")
--> 157 torch.autograd.backward(outputs_with_grad, args_with_grad)
158 grads = tuple(inp.grad if isinstance(inp, torch.Tensor) else None
159 for inp in detached_inputs)
161 return (None, None) + grads
File /opt/conda/lib/python3.10/site-packages/torch/autograd/__init__.py:200, in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
195 retain_graph = create_graph
197 # The reason we repeat same the comment below is that
198 # some Python versions print out the first line of a multi-line function
199 # calls in the traceback and some print out the last line
--> 200 Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
201 tensors, grad_tensors_, retain_graph, create_graph, inputs,
202 allow_unreachable=True, accumulate_grad=True)
File /opt/conda/lib/python3.10/site-packages/torch/autograd/function.py:274, in BackwardCFunction.apply(self, *args)
270 raise RuntimeError("Implementing both 'backward' and 'vjp' for a custom "
271 "Function is not allowed. You should only implement one "
272 "of them.")
273 user_fn = vjp_fn if vjp_fn is not Function.vjp else backward_fn
--> 274 return user_fn(self, *args)
File /opt/conda/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py:480, in MatMul8bitLt.backward(ctx, grad_output)
478 elif state.CB is not None:
479 CB = state.CB.to(ctx.dtype_A, copy=True).mul_(state.SCB.unsqueeze(1).mul(1.0 / 127.0))
--> 480 grad_A = torch.matmul(grad_output, CB).view(ctx.grad_shape).to(ctx.dtype_A)
481 elif state.CxB is not None:
482 CB = (
483 undo_layout(state.CxB, state.tile_indices)
484 .to(ctx.dtype_A)
485 .mul_(state.SCB.unsqueeze(1).mul(1.0 / 127.0))
486 )
RuntimeError: expected scalar type Half but found Float
请向我提供解决方案,或者 github 以外的某个地方,我可以对此提出投诉,以便解决这个问题。
答: 暂无答案
评论