Spaces:
Runtime error
Runtime error
| import torch | |
| import numpy as np | |
| MAX_USER_QUERY_LEN = 35 | |
| # List of example queries for easy access | |
| DEFAULT_QUERIES = { | |
| "Example Query 1": "Who visited microsoft.com on September 18?", | |
| "Example Query 2": "Does Kate has drive ?", | |
| "Example Query 3": "What phone number can be used to contact David Johnson?", | |
| } | |
| def get_batch_text_representation(texts, model, tokenizer, batch_size=1): | |
| """ | |
| Get mean-pooled representations of given texts in batches. | |
| """ | |
| mean_pooled_batch = [] | |
| for i in range(0, len(texts), batch_size): | |
| batch_texts = texts[i:i+batch_size] | |
| inputs = tokenizer(batch_texts, return_tensors="pt", padding=True, truncation=True) | |
| with torch.no_grad(): | |
| outputs = model(**inputs, output_hidden_states=False) | |
| last_hidden_states = outputs.last_hidden_state | |
| input_mask_expanded = inputs['attention_mask'].unsqueeze(-1).expand(last_hidden_states.size()).float() | |
| sum_embeddings = torch.sum(last_hidden_states * input_mask_expanded, 1) | |
| sum_mask = input_mask_expanded.sum(1) | |
| mean_pooled = sum_embeddings / sum_mask | |
| mean_pooled_batch.extend(mean_pooled.cpu().detach().numpy()) | |
| return np.array(mean_pooled_batch) | |
| def is_user_query_valid(user_query: str) -> bool: | |
| """ | |
| Check if the `user_query` is None and not empty. | |
| Args: | |
| user_query (str): The input text to be checked. | |
| Returns: | |
| bool: True if the `user_query` is None or empty, False otherwise. | |
| """ | |
| # If the query is not part of the default queries | |
| is_default_query = user_query in DEFAULT_QUERIES.values() | |
| # Check if the query exceeds the length limit | |
| is_exceeded_max_length = user_query is not None and len(user_query) <= MAX_USER_QUERY_LEN | |
| return not is_default_query and not is_exceeded_max_length | |