Skip to content

Commit

Permalink
fix: fast tokenizer conversion should happen offline
Browse files Browse the repository at this point in the history
Signed-off-by: Travis Johnson <[email protected]>
  • Loading branch information
tjohnson31415 committed Jul 30, 2024
1 parent 5b5938e commit 4569285
Showing 1 changed file with 16 additions and 11 deletions.
27 changes: 16 additions & 11 deletions launcher/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -870,19 +870,24 @@ fn save_fast_tokenizer(
info!("Saving fast tokenizer for `{model_name}` to `{save_path}`");
let model_name = model_name.escape_default();
let revision = revision.map(|v| v.escape_default());
let code = if let Some(revision) = revision {
format!(
"from transformers import AutoTokenizer; \
AutoTokenizer.from_pretrained(\"{model_name}\", \
revision=\"{revision}\", local_files_only=True).save_pretrained(\"{save_path}\")"
)
let revision_arg = if let Some(revision) = revision {
format!("revision=\"{revision}\", ")
} else {
format!(
"from transformers import AutoTokenizer; \
AutoTokenizer.from_pretrained(\"{model_name}\").save_pretrained(\"{save_path}\")"
)
"".to_string()
};
match Command::new("python").args(["-c", &code]).status() {
let code = format!(
"from transformers import AutoTokenizer; \
AutoTokenizer.from_pretrained( \
\"{model_name}\", \
{revision_arg} \
local_files_only=True \
).save_pretrained(\"{save_path}\")"
);
match Command::new("python")
.args(["-c", &code])
.env("HF_HUB_OFFLINE", "1")
.status()
{
Ok(status) => {
if status.success() {
Ok(())
Expand Down

0 comments on commit 4569285

Please sign in to comment.