Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import soundfile as sf | |
| import librosa | |
| from transformers import HubertForCTC, Wav2Vec2Processor , pipeline , Wav2Vec2ForCTC , Wav2Vec2Tokenizer | |
| import torch | |
| import spacy | |
| from spacy import displacy | |
| import en_core_web_sm | |
| import spacy.cli | |
| from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
| import nltk | |
| from nltk import tokenize | |
| nltk.download('punkt') | |
| import spacy_streamlit | |
| from datasets import load_dataset | |
| from transformers import pipeline | |
| st.title('Audio-to-Text') | |
| audio_file = st.file_uploader('Upload Audio' , type=['wav' , 'mp3','m4a']) | |
| st.subheader( 'Please select any of the NLP tasks') | |
| if st.button('Audio Transcription'): | |
| if audio_file is not None: | |
| processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
| model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft") | |
| speech, rate = librosa.load(audio_file, sr=16000) | |
| input_values = processor(speech, return_tensors="pt", padding="longest", sampling_rate=rate).input_values | |
| logits = model(input_values).logits | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| text = processor.batch_decode(predicted_ids) | |
| summary_list = [str(sentence) for sentence in text] | |
| result = ' '.join(summary_list) | |
| st.markdown(result) | |
| else: | |
| st.error('please upload the audio file') | |
| if st.button('Summarize'): | |
| processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
| model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft") | |
| speech, rate = librosa.load(audio_file, sr=16000) | |
| input_values = processor(speech, return_tensors="pt", padding="longest", sampling_rate=rate).input_values | |
| logits = model(input_values).logits | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| text = processor.batch_decode(predicted_ids) | |
| summary_list = [str(sentence) for sentence in text] | |
| result = ' '.join(summary_list) | |
| summarize = pipeline("summarization" , model='facebook/bart-large-cnn') | |
| st.markdown(summarize(result)[0]['summary_text']) | |
| if st.button('Sentiment Analysis'): | |
| processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
| model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft") | |
| speech, rate = librosa.load(audio_file, sr=16000) | |
| input_values = processor(speech, return_tensors="pt", padding="longest", sampling_rate=rate).input_values | |
| logits = model(input_values).logits | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| text = processor.batch_decode(predicted_ids) | |
| summary_list = [str(sentence) for sentence in text] | |
| result = ' '.join(summary_list) | |
| nlp_sa = pipeline("sentiment-analysis") | |
| st.markdown(nlp_sa(result)) | |
| if st.button('Audio Classification'): | |
| processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
| model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft") | |
| speech, rate = librosa.load(audio_file, sr=16000) | |
| input_values = processor(speech, return_tensors="pt", padding="longest", sampling_rate=rate).input_values | |
| logits = model(input_values).logits | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| text = processor.batch_decode(predicted_ids) | |
| summary_list = [str(sentence) for sentence in text] | |
| result = ' '.join(summary_list) | |
| dataset = load_dataset("anton-l/superb_demo", "er", split="session1") | |
| classifier = pipeline("audio-classification", model="superb/wav2vec2-base-superb-er") | |
| labels = classifier(dataset[0]["file"], top_k=5) | |
| st.markdown(labels) | |
| if st.button('Name Entity Recognition'): | |
| processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
| model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft") | |
| speech, rate = librosa.load(audio_file, sr=16000) | |
| input_values = processor(speech, return_tensors="pt", padding="longest", sampling_rate=rate).input_values | |
| logits = model(input_values).logits | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| text = processor.batch_decode(predicted_ids) | |
| summary_list = [str(sentence) for sentence in text] | |
| result = ' '.join(summary_list) | |
| nlp = spacy.load('en_core_web_sm') | |
| doc=nlp(result) | |
| spacy_streamlit.visualize_ner(doc, labels=nlp.get_pipe("ner").labels, title= "List of Entities") | |
| tokenizer = AutoTokenizer.from_pretrained("t5-base") | |
| def load_model(): | |
| model = AutoModelForSeq2SeqLM.from_pretrained("t5-base") | |
| return model | |
| model1 = load_model() | |
| st.subheader('Select your source and target language below.') | |
| source_lang = st.selectbox("Source language",['English']) | |
| target_lang = st.selectbox("Target language",['German','French']) | |
| if st.button('Translate'): | |
| processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
| model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft") | |
| speech, rate = librosa.load(audio_file, sr=16000) | |
| input_values = processor(speech, return_tensors="pt", padding="longest", sampling_rate=rate).input_values | |
| logits = model(input_values).logits | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| text = processor.batch_decode(predicted_ids) | |
| summary_list = [str(sentence) for sentence in text] | |
| result = ' '.join(summary_list) | |
| prefix = 'translate '+str(source_lang)+' to '+str(target_lang) | |
| sentence_token = tokenize.sent_tokenize(result) | |
| output = tokenizer([prefix+sentence for sentence in sentence_token], padding=True, return_tensors="pt") | |
| translated_id = model1.generate(output["input_ids"], attention_mask=output['attention_mask'], max_length=10000) | |
| translated_word = tokenizer.batch_decode(translated_id, skip_special_tokens=True) | |
| st.subheader('Translated Text') | |
| st.write(' '.join(translated_word)) | |