| | |
| | |
| |
|
| | from transformers import AutoModel, AutoConfig |
| | from huggingface_hub import hf_hub_download |
| | import torch |
| | import numpy as np |
| | import importlib.util |
| |
|
| | def load_model_and_collator(): |
| | |
| | model_files = hf_hub_download(repo_id="videoloc/seamless-basic", filename="modeling_seamless_basic.py") |
| | spec = importlib.util.spec_from_file_location("modeling_seamless_basic", model_files) |
| | modeling_module = importlib.util.module_from_spec(spec) |
| | spec.loader.exec_module(modeling_module) |
| |
|
| | |
| | config = modeling_module.SeamlessBasicConfig.from_pretrained("videoloc/seamless-basic") |
| | model = modeling_module.HFSeamlessBasic.from_pretrained("videoloc/seamless-basic") |
| | |
| | |
| | collator_file = hf_hub_download(repo_id="videoloc/seamless-basic", filename="data_collator.py") |
| | spec = importlib.util.spec_from_file_location("data_collator", collator_file) |
| | collator_module = importlib.util.module_from_spec(spec) |
| | spec.loader.exec_module(collator_module) |
| | |
| | data_collator = collator_module.DataCollatorSimpleSeamless( |
| | processor="facebook/hf-seamless-m4t-medium", |
| | max_audio_length_sec=8.0, |
| | max_text_length=256 |
| | ) |
| | |
| | return model, data_collator |
| |
|
| | def example_inference(): |
| | model, collator = load_model_and_collator() |
| | |
| | |
| | data = [{ |
| | 'raw_audio': np.random.randn(16000 * 3), |
| | 'raw_text': "Hello, welcome to our presentation today.", |
| | }] |
| | |
| | batch = collator(data) |
| | model.eval() |
| | with torch.no_grad(): |
| | outputs = model(**batch) |
| | tte_prediction = outputs.logits.item() |
| | |
| | print(f"Predicted Time To Edit (TTE): {tte_prediction:.2f} seconds") |
| | return tte_prediction |
| |
|
| | if __name__ == "__main__": |
| | example_inference() |
| |
|