| from __future__ import annotations |
|
|
| import json |
| import os |
| import re |
| import shutil |
| import stat |
| import tempfile |
| import time |
| import uuid |
| import zipfile |
| from dataclasses import asdict, dataclass |
| from pathlib import Path, PurePosixPath |
| from typing import Any |
|
|
| DOMAINS = ( |
| 'Astronomy', |
| 'Chemistry', |
| 'Earth', |
| 'Energy', |
| 'Information', |
| 'Life', |
| 'Material', |
| 'Math', |
| 'Neuroscience', |
| 'Physics', |
| ) |
| DOMAIN_TOKEN_RE = re.compile(r'^[A-Za-z][A-Za-z0-9-]*$') |
| TASK_ID_RE = re.compile(r'^([A-Za-z][A-Za-z0-9-]*)_(\d{3})$') |
| STALE_TOKENS = ( |
| '/mnt/shared-storage-user/', |
| 'SGI-EvalAgent', |
| 'prior_literature', |
| 'target_literature', |
| ) |
| STALE_TASK_REF_RE = re.compile(r'(?:\./|/)tasks/[\w/.\-]+') |
| DATA_REF_RE = re.compile(r"""\./data/[^'"`\n;,]+""") |
| EXPECTED_TOP_LEVEL = {'data', 'related_work', 'target_study', 'task_info.json'} |
| EXPECTED_TARGET_STUDY = ('checklist.json', 'images/', 'paper.pdf') |
| EXPECTED_TASK_INFO_KEYS = ('data', 'task') |
| EXPECTED_DATA_ITEM_KEYS = ('description', 'name', 'path', 'type') |
| EXPECTED_CHECKLIST_ITEM_KEYS = ('content', 'keywords', 'path', 'type', 'weight') |
| IGNORED_ARCHIVE_PARTS = {'__MACOSX'} |
| IGNORED_ARCHIVE_NAMES = {'.DS_Store'} |
| DEFAULT_MAX_FILES = int(os.environ.get('RCB_SPACE_MAX_FILES', '5000')) |
| DEFAULT_MAX_TOTAL_BYTES = int(os.environ.get('RCB_SPACE_MAX_TOTAL_BYTES', str(5 * 1024 * 1024 * 1024))) |
| DEFAULT_MAX_SINGLE_FILE_BYTES = int(os.environ.get('RCB_SPACE_MAX_SINGLE_FILE_BYTES', str(1024 * 1024 * 1024))) |
| WORK_DIR_PREFIX = 'rcb_space_submit_' |
| ARCHIVE_PREFIX = 'rcb_space_upload_' |
| TEMP_ROOTS = tuple( |
| Path(root).resolve() |
| for root in { |
| tempfile.gettempdir(), |
| os.environ.get('GRADIO_TEMP_DIR', ''), |
| } |
| if root |
| ) |
|
|
|
|
| @dataclass |
| class SubmissionMetadata: |
| domain: str |
| submitter: str |
| email: str |
| paper_title: str |
| paper_url: str |
| notes: str = '' |
|
|
|
|
| @dataclass |
| class ArchiveStats: |
| file_count: int |
| total_bytes: int |
|
|
|
|
| @dataclass |
| class PreparedSubmission: |
| work_dir: str |
| uploaded_task_dir: str |
| staged_task_dir: str |
| assigned_task_id: str |
| archive_stats: ArchiveStats |
| metadata: SubmissionMetadata |
|
|
| def to_state(self) -> dict[str, Any]: |
| return { |
| 'work_dir': self.work_dir, |
| 'uploaded_task_dir': self.uploaded_task_dir, |
| 'staged_task_dir': self.staged_task_dir, |
| 'assigned_task_id': self.assigned_task_id, |
| 'archive_stats': asdict(self.archive_stats), |
| 'metadata': asdict(self.metadata), |
| } |
|
|
| @classmethod |
| def from_state(cls, state: dict[str, Any]) -> 'PreparedSubmission': |
| return cls( |
| work_dir=state['work_dir'], |
| uploaded_task_dir=state['uploaded_task_dir'], |
| staged_task_dir=state['staged_task_dir'], |
| assigned_task_id=state['assigned_task_id'], |
| archive_stats=ArchiveStats(**state['archive_stats']), |
| metadata=SubmissionMetadata(**state['metadata']), |
| ) |
|
|
|
|
| class ValidationError(RuntimeError): |
| pass |
|
|
|
|
| def normalize_domain_token(domain: str) -> str: |
| value = re.sub(r'[\s_]+', '-', (domain or '').strip()) |
| value = re.sub(r'-{2,}', '-', value) |
| return value.strip('-') |
|
|
|
|
| def load_json(path: Path) -> Any: |
| try: |
| return json.loads(path.read_text(encoding='utf-8')) |
| except Exception as exc: |
| raise ValidationError(f'Failed to parse JSON: {path}: {exc}') from exc |
|
|
|
|
| def rel(path: Path, base: Path) -> str: |
| try: |
| return str(path.relative_to(base)) |
| except Exception: |
| return str(path) |
|
|
|
|
| def _target_entries(target_dir: Path) -> tuple[str, ...]: |
| return tuple(sorted(x.name + ('/' if x.is_dir() else '') for x in target_dir.iterdir())) |
|
|
|
|
| def _is_ignored_archive_path(path: PurePosixPath) -> bool: |
| return any(part in IGNORED_ARCHIVE_PARTS for part in path.parts) or path.name in IGNORED_ARCHIVE_NAMES or path.name.startswith('._') |
|
|
|
|
| def _is_zip_symlink(info: zipfile.ZipInfo) -> bool: |
| mode = info.external_attr >> 16 |
| return stat.S_ISLNK(mode) |
|
|
|
|
| def _iter_data_refs(text: str) -> list[str]: |
| refs = [] |
| for raw_ref in DATA_REF_RE.findall(text): |
| ref = raw_ref.rstrip('.') |
| if ref not in refs: |
| refs.append(ref) |
| return refs |
|
|
|
|
| def _iter_visible_files(root: Path) -> set[Path]: |
| return { |
| path for path in root.rglob('*') |
| if path.is_file() and not any(part.startswith('.') for part in path.relative_to(root).parts) |
| } |
|
|
|
|
| def cleanup_work_dir(work_dir: str | Path | None) -> None: |
| if not work_dir: |
| return |
| shutil.rmtree(Path(work_dir), ignore_errors=True) |
|
|
|
|
| def cleanup_submission_state(state: dict[str, Any] | None) -> None: |
| if not state or not isinstance(state, dict): |
| return |
| cleanup_work_dir(state.get('work_dir')) |
|
|
|
|
| def _is_under_temp_root(path: Path) -> bool: |
| try: |
| resolved = path.resolve() |
| except OSError: |
| return False |
| return any(root == resolved or root in resolved.parents for root in TEMP_ROOTS) |
|
|
|
|
| def cleanup_uploaded_archive(archive_path: str | Path | None) -> None: |
| if not archive_path: |
| return |
| path = Path(archive_path) |
| if not path.exists() or not path.is_file(): |
| return |
| if path.suffix.lower() != '.zip': |
| return |
| if not _is_under_temp_root(path): |
| return |
| if not path.name.startswith(ARCHIVE_PREFIX): |
| return |
| try: |
| path.unlink() |
| except OSError: |
| pass |
|
|
|
|
| def persist_uploaded_archive(archive_path: str | Path) -> Path: |
| source = Path(archive_path) |
| if not source.exists() or not source.is_file(): |
| raise ValidationError(f'Uploaded archive does not exist: {source}') |
| if source.suffix.lower() != '.zip': |
| raise ValidationError('Only .zip uploads are supported.') |
|
|
| managed_root = Path(tempfile.gettempdir()) |
| managed_path = managed_root / f'{ARCHIVE_PREFIX}{uuid.uuid4().hex}.zip' |
| shutil.copy2(source, managed_path) |
| return managed_path |
|
|
|
|
| def cleanup_stale_managed_files(max_age_seconds: int) -> int: |
| if max_age_seconds <= 0: |
| return 0 |
|
|
| temp_root = Path(tempfile.gettempdir()) |
| if not temp_root.exists(): |
| return 0 |
|
|
| cutoff = time.time() - max_age_seconds |
| removed = 0 |
| for path in temp_root.iterdir(): |
| if path.is_dir(): |
| if not path.name.startswith(WORK_DIR_PREFIX): |
| continue |
| elif path.is_file(): |
| if not path.name.startswith(ARCHIVE_PREFIX) or path.suffix.lower() != '.zip': |
| continue |
| else: |
| continue |
| try: |
| if path.stat().st_mtime > cutoff: |
| continue |
| except OSError: |
| continue |
| if path.is_dir(): |
| shutil.rmtree(path, ignore_errors=True) |
| else: |
| try: |
| path.unlink() |
| except OSError: |
| continue |
| removed += 1 |
| return removed |
|
|
|
|
| def create_work_dir() -> Path: |
| return Path(tempfile.mkdtemp(prefix=WORK_DIR_PREFIX)) |
|
|
|
|
| def extract_submission_zip( |
| archive_path: str | Path, |
| work_dir: str | Path, |
| *, |
| max_files: int = DEFAULT_MAX_FILES, |
| max_total_bytes: int = DEFAULT_MAX_TOTAL_BYTES, |
| max_single_file_bytes: int = DEFAULT_MAX_SINGLE_FILE_BYTES, |
| ) -> tuple[Path, ArchiveStats]: |
| archive_path = Path(archive_path) |
| work_dir = Path(work_dir) |
| extract_root = work_dir / 'extracted' |
| extract_root.mkdir(parents=True, exist_ok=True) |
|
|
| if archive_path.suffix.lower() != '.zip': |
| raise ValidationError('Only .zip uploads are supported.') |
|
|
| file_count = 0 |
| total_bytes = 0 |
| safe_infos: list[tuple[zipfile.ZipInfo, PurePosixPath]] = [] |
|
|
| with zipfile.ZipFile(archive_path) as zf: |
| infos = zf.infolist() |
| if not infos: |
| raise ValidationError('The uploaded zip archive is empty.') |
|
|
| for info in infos: |
| raw_name = info.filename.replace('\\', '/') |
| if not raw_name: |
| continue |
| posix_path = PurePosixPath(raw_name) |
| if _is_ignored_archive_path(posix_path): |
| continue |
| if posix_path.is_absolute() or '..' in posix_path.parts: |
| raise ValidationError(f'Archive contains an invalid path: {raw_name}') |
| if _is_zip_symlink(info): |
| raise ValidationError(f'Archive contains a symbolic link, which is not allowed: {raw_name}') |
| safe_infos.append((info, posix_path)) |
| if info.is_dir(): |
| continue |
| file_count += 1 |
| total_bytes += info.file_size |
| if info.file_size > max_single_file_bytes: |
| raise ValidationError( |
| f'Archive file exceeds the per-file limit ({max_single_file_bytes} bytes): {raw_name}' |
| ) |
| if file_count > max_files: |
| raise ValidationError(f'Archive exceeds the file-count limit ({max_files}).') |
| if total_bytes > max_total_bytes: |
| raise ValidationError(f'Archive exceeds the total-size limit ({max_total_bytes} bytes).') |
|
|
| if file_count == 0: |
| raise ValidationError('The uploaded zip archive does not contain any files.') |
|
|
| for info, posix_path in safe_infos: |
| destination = extract_root.joinpath(*posix_path.parts) |
| if info.is_dir(): |
| destination.mkdir(parents=True, exist_ok=True) |
| continue |
| destination.parent.mkdir(parents=True, exist_ok=True) |
| with zf.open(info) as src, destination.open('wb') as dst: |
| shutil.copyfileobj(src, dst) |
|
|
| return extract_root, ArchiveStats(file_count=file_count, total_bytes=total_bytes) |
|
|
|
|
| def find_single_task_dir(extract_root: str | Path) -> Path: |
| extract_root = Path(extract_root) |
| entries = [] |
| for path in sorted(extract_root.iterdir(), key=lambda p: p.name.lower()): |
| if path.name in IGNORED_ARCHIVE_NAMES or path.name in IGNORED_ARCHIVE_PARTS or path.name.startswith('._'): |
| continue |
| entries.append(path) |
|
|
| if len(entries) != 1 or not entries[0].is_dir(): |
| names = [p.name for p in entries] |
| raise ValidationError( |
| 'Zip must contain exactly one top-level task directory. ' |
| f'Found: {names if names else "(none)"}' |
| ) |
| return entries[0] |
|
|
|
|
| def validate_submission_metadata(metadata: SubmissionMetadata) -> list[str]: |
| errors: list[str] = [] |
| normalized_domain = normalize_domain_token(metadata.domain) |
| if not normalized_domain: |
| errors.append('A domain is required.') |
| elif not DOMAIN_TOKEN_RE.fullmatch(normalized_domain): |
| errors.append( |
| 'Domain must start with a letter and contain only letters, numbers, or hyphens ' |
| f'after normalization. Got: {metadata.domain!r}' |
| ) |
| if not metadata.submitter.strip(): |
| errors.append('Submitter name or HF username is required.') |
| if not metadata.email.strip(): |
| errors.append('Contact email is required.') |
| elif not re.fullmatch(r'[^@\s]+@[^@\s]+\.[^@\s]+', metadata.email.strip()): |
| errors.append('Contact email must be a valid email address.') |
| if not metadata.paper_title.strip(): |
| errors.append('Paper title is required.') |
| if not metadata.paper_url.strip(): |
| errors.append('Paper URL or DOI is required.') |
| return errors |
|
|
|
|
| def validate_task_dir( |
| task_dir: str | Path, |
| *, |
| enforce_task_name: bool = True, |
| expected_domain: str | None = None, |
| ) -> list[str]: |
| task_dir = Path(task_dir) |
| errors: list[str] = [] |
| task_name = task_dir.name |
| match = TASK_ID_RE.match(task_name) |
|
|
| if enforce_task_name: |
| if not match: |
| errors.append(f'invalid task directory name: {task_name}') |
| elif expected_domain and match.group(1) != expected_domain: |
| errors.append(f'task directory domain {match.group(1)!r} does not match selected domain {expected_domain!r}') |
| elif expected_domain and match and match.group(1) != expected_domain: |
| errors.append(f'task directory domain {match.group(1)!r} does not match selected domain {expected_domain!r}') |
|
|
| if not task_dir.is_dir(): |
| return [f'task directory does not exist: {task_dir}'] |
|
|
| actual_top = {p.name for p in task_dir.iterdir()} |
| if actual_top != EXPECTED_TOP_LEVEL: |
| errors.append(f'top-level entries mismatch: expected {sorted(EXPECTED_TOP_LEVEL)}, got {sorted(actual_top)}') |
|
|
| data_dir = task_dir / 'data' |
| related_dir = task_dir / 'related_work' |
| target_dir = task_dir / 'target_study' |
| task_info_path = task_dir / 'task_info.json' |
| checklist_path = target_dir / 'checklist.json' |
| paper_path = target_dir / 'paper.pdf' |
| images_dir = target_dir / 'images' |
|
|
| if not data_dir.is_dir(): |
| errors.append('missing data/ directory') |
| if not related_dir.is_dir(): |
| errors.append('missing related_work/ directory') |
| if not target_dir.is_dir(): |
| errors.append('missing target_study/ directory') |
| if not task_info_path.is_file(): |
| errors.append('missing task_info.json') |
| return errors |
| if not checklist_path.is_file(): |
| errors.append('missing target_study/checklist.json') |
| return errors |
| if not paper_path.is_file(): |
| errors.append('missing target_study/paper.pdf') |
| if not images_dir.is_dir(): |
| errors.append('missing target_study/images/ directory') |
|
|
| try: |
| task_info = load_json(task_info_path) |
| except ValidationError as exc: |
| errors.append(str(exc)) |
| return errors |
|
|
| if not isinstance(task_info, dict): |
| errors.append('task_info.json root must be a JSON object') |
| return errors |
|
|
| if tuple(sorted(task_info.keys())) != EXPECTED_TASK_INFO_KEYS: |
| errors.append(f'task_info.json keys mismatch: {sorted(task_info.keys())}') |
|
|
| if not isinstance(task_info.get('task'), str) or not task_info['task'].strip(): |
| errors.append('task_info.json field `task` must be a non-empty string') |
| if not isinstance(task_info.get('data'), list): |
| errors.append('task_info.json field `data` must be a list') |
| task_info['data'] = [] |
|
|
| declared_paths: set[str] = set() |
| for idx, item in enumerate(task_info['data']): |
| prefix = f'task_info.data[{idx}]' |
| if not isinstance(item, dict): |
| errors.append(f'{prefix} must be an object') |
| continue |
| if tuple(sorted(item.keys())) != EXPECTED_DATA_ITEM_KEYS: |
| errors.append(f'{prefix} keys mismatch: {sorted(item.keys())}') |
| continue |
| for field in EXPECTED_DATA_ITEM_KEYS: |
| if not isinstance(item.get(field), str) or not item[field].strip(): |
| errors.append(f'{prefix}.{field} must be a non-empty string') |
| data_path = item.get('path') |
| if not isinstance(data_path, str): |
| continue |
| if not data_path.startswith('./data/') and data_path != './data': |
| errors.append(f'{prefix}.path must start with ./data/: {data_path}') |
| continue |
| if '\\' in data_path or '..' in Path(data_path).parts: |
| errors.append(f'{prefix}.path contains an invalid segment: {data_path}') |
| continue |
| if data_path in declared_paths: |
| errors.append(f'duplicate data path declaration: {data_path}') |
| continue |
| declared_paths.add(data_path) |
| rel_path = data_path[2:] if data_path.startswith('./') else data_path |
| target = task_dir / rel_path |
| if not target.exists(): |
| errors.append(f'{prefix}.path does not exist: {data_path}') |
| continue |
| if target.is_dir(): |
| nested_files = {p for p in target.rglob('*') if p.is_file()} |
| if not nested_files: |
| errors.append(f'{prefix}.path points to an empty directory: {data_path}') |
| elif not target.is_file(): |
| errors.append(f'{prefix}.path is neither file nor directory: {data_path}') |
| description = item.get('description', '') |
| if any(token in description for token in STALE_TOKENS): |
| errors.append(f'{prefix}.description still contains stale source paths or legacy directories') |
|
|
| related_entries = sorted(related_dir.iterdir(), key=lambda p: p.name) if related_dir.exists() else [] |
| related_files = [p for p in related_entries if p.is_file()] |
| related_dirs = [p for p in related_entries if p.is_dir()] |
| if related_dirs: |
| errors.append('related_work/ must not contain subdirectories') |
| if not related_files: |
| errors.append('related_work/ must contain at least one PDF') |
| pdf_names = [] |
| for path in related_files: |
| if not re.fullmatch(r'paper_\d{3}\.pdf', path.name): |
| errors.append(f'invalid related_work filename: {path.name}') |
| pdf_names.append(path.name) |
| expected_pdf_names = [f'paper_{i:03d}.pdf' for i in range(len(pdf_names))] |
| if pdf_names and pdf_names != expected_pdf_names: |
| errors.append(f'related_work PDFs must be contiguous starting from paper_000.pdf; got {pdf_names}') |
|
|
| if target_dir.exists() and _target_entries(target_dir) != EXPECTED_TARGET_STUDY: |
| errors.append(f'target_study entries mismatch: {_target_entries(target_dir)}') |
|
|
| try: |
| checklist = load_json(checklist_path) |
| except ValidationError as exc: |
| errors.append(str(exc)) |
| return errors |
|
|
| if not isinstance(checklist, list): |
| errors.append('checklist.json root must be a list') |
| return errors |
|
|
| if not checklist: |
| errors.append('checklist.json must be a non-empty list') |
| checklist = [] |
|
|
| referenced_images: set[str] = set() |
| for idx, item in enumerate(checklist): |
| prefix = f'checklist[{idx}]' |
| if not isinstance(item, dict): |
| errors.append(f'{prefix} must be an object') |
| continue |
| if tuple(sorted(item.keys())) != EXPECTED_CHECKLIST_ITEM_KEYS: |
| errors.append(f'{prefix} keys mismatch: {sorted(item.keys())}') |
| continue |
| item_type = item.get('type') |
| if item_type not in {'text', 'image'}: |
| errors.append(f'{prefix}.type must be text or image, got {item_type!r}') |
| if not isinstance(item.get('content'), str) or not item['content'].strip(): |
| errors.append(f'{prefix}.content must be a non-empty string') |
| if not isinstance(item.get('keywords'), list) or not item['keywords']: |
| errors.append(f'{prefix}.keywords must be a non-empty list') |
| elif not all(isinstance(x, str) and x.strip() for x in item['keywords']): |
| errors.append(f'{prefix}.keywords must contain only non-empty strings') |
| if not isinstance(item.get('weight'), (int, float)) or item['weight'] <= 0: |
| errors.append(f'{prefix}.weight must be a positive number') |
| path_value = item.get('path') |
| if item_type == 'text': |
| if path_value is not None: |
| errors.append(f'{prefix}.path must be null for text items') |
| elif item_type == 'image': |
| if not isinstance(path_value, str) or not path_value.startswith('images/'): |
| errors.append(f'{prefix}.path must start with images/ for image items') |
| else: |
| if '\\' in path_value or '..' in Path(path_value).parts: |
| errors.append(f'{prefix}.path contains an invalid segment: {path_value}') |
| image_path = target_dir / path_value |
| if not image_path.is_file(): |
| errors.append(f'{prefix}.path does not exist: {path_value}') |
| referenced_images.add(path_value) |
|
|
| actual_image_files = {str(p.relative_to(target_dir)) for p in _iter_visible_files(images_dir)} if images_dir.exists() else set() |
| missing_images = sorted(referenced_images - actual_image_files) |
| if missing_images: |
| errors.append('checklist image references are missing from target_study/images: ' + ', '.join(missing_images[:20])) |
|
|
| for text_path in (task_info_path, checklist_path): |
| text = text_path.read_text(encoding='utf-8') |
| if any(token in text for token in STALE_TOKENS): |
| errors.append(f'stale source path tokens remain in {rel(text_path, task_dir)}') |
|
|
| task_text = task_info.get('task', '') if isinstance(task_info, dict) else '' |
| if isinstance(task_text, str): |
| for ref in STALE_TASK_REF_RE.findall(task_text): |
| errors.append(f'task description contains stale path: {ref}') |
|
|
| for idx, item in enumerate(task_info.get('data', [])): |
| desc = item.get('description', '') |
| for ref in STALE_TASK_REF_RE.findall(desc): |
| errors.append(f'task_info.data[{idx}].description contains stale path: {ref}') |
| for data_ref in _iter_data_refs(desc): |
| rel_ref = data_ref[2:] |
| if not (task_dir / rel_ref).exists(): |
| errors.append(f'task_info.data[{idx}].description references non-existent path: {data_ref}') |
|
|
| return errors |
|
|
|
|
| def stage_submission(task_dir: str | Path, assigned_task_id: str, work_dir: str | Path) -> Path: |
| task_dir = Path(task_dir) |
| work_dir = Path(work_dir) |
| staged_root = work_dir / 'staged' |
| staged_root.mkdir(parents=True, exist_ok=True) |
| staged_task_dir = staged_root / assigned_task_id |
| if staged_task_dir.exists(): |
| shutil.rmtree(staged_task_dir) |
| shutil.copytree(task_dir, staged_task_dir) |
| return staged_task_dir |
|
|
|
|
| def build_public_report(prepared: PreparedSubmission) -> dict[str, Any]: |
| return { |
| 'status': 'ok', |
| 'assigned_task_id': prepared.assigned_task_id, |
| 'archive': { |
| 'file_count': prepared.archive_stats.file_count, |
| 'total_bytes': prepared.archive_stats.total_bytes, |
| }, |
| 'metadata': { |
| 'domain': prepared.metadata.domain, |
| 'submitter': prepared.metadata.submitter, |
| 'paper_title': prepared.metadata.paper_title, |
| 'paper_url': prepared.metadata.paper_url, |
| }, |
| } |
|
|
|
|
| def validate_and_prepare_submission( |
| archive_path: str | Path, |
| metadata: SubmissionMetadata, |
| assigned_task_id: str, |
| ) -> PreparedSubmission: |
| metadata_errors = validate_submission_metadata(metadata) |
| if metadata_errors: |
| raise ValidationError('\n'.join(metadata_errors)) |
|
|
| work_dir = create_work_dir() |
| try: |
| extract_root, archive_stats = extract_submission_zip(archive_path, work_dir) |
| uploaded_task_dir = find_single_task_dir(extract_root) |
| errors = validate_task_dir(uploaded_task_dir, enforce_task_name=False, expected_domain=metadata.domain) |
| if errors: |
| raise ValidationError('\n'.join(errors)) |
| staged_task_dir = stage_submission(uploaded_task_dir, assigned_task_id, work_dir) |
| return PreparedSubmission( |
| work_dir=str(work_dir), |
| uploaded_task_dir=str(uploaded_task_dir), |
| staged_task_dir=str(staged_task_dir), |
| assigned_task_id=assigned_task_id, |
| archive_stats=archive_stats, |
| metadata=metadata, |
| ) |
| except Exception: |
| cleanup_work_dir(work_dir) |
| raise |
|
|