gpu_monitoring_system / web_interface.py
meccatronis's picture
Upload web_interface.py with huggingface_hub
8d4e0c1 verified
#!/usr/bin/env python3
"""
Web Interface for GPU Monitoring
Provides a web-based dashboard for remote GPU monitoring with real-time charts,
historical data, and fan control capabilities.
"""
from flask import Flask, render_template, jsonify, request, redirect, url_for
from flask_cors import CORS
import json
import time
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional
from gpu_monitoring import GPUManager, GPUStatus
from gpu_fan_controller import FanController, FanMode, ProfileType
logger = logging.getLogger(__name__)
app = Flask(__name__)
CORS(app, origins="*") # Allow all origins for local development
class WebGPUManager:
"""Web interface manager for GPU monitoring."""
def __init__(self):
self.gpu_manager = GPUManager()
self.fan_controller = FanController()
self.config = self.load_config()
# Initialize components
self.gpu_manager.initialize()
self.fan_controller.initialize()
def load_config(self) -> Dict:
"""Load web interface configuration."""
try:
with open('config/monitoring.json', 'r') as f:
config = json.load(f)
return config.get('web', {})
except:
return {
'enabled': True,
'host': '0.0.0.0',
'port': 5000,
'debug': False
}
def get_current_status(self) -> Dict[str, Any]:
"""Get current GPU status."""
status_dict = self.gpu_manager.get_status()
fan_status = self.fan_controller.get_status()
result = {
'timestamp': time.time(),
'gpus': {},
'fan_control': {
'mode': fan_status.mode.value if fan_status else 'unknown',
'profile': fan_status.profile if fan_status else 'unknown',
'current_pwm': fan_status.current_pwm if fan_status else 0,
'temperature': fan_status.temperature if fan_status else 0.0
}
}
for gpu_name, gpu_status in status_dict.items():
if gpu_status:
result['gpus'][gpu_name] = {
'temperature': gpu_status.temperature,
'load': gpu_status.load,
'fan_speed': gpu_status.fan_speed,
'fan_pwm': gpu_status.fan_pwm,
'power_draw': gpu_status.power_draw,
'memory_used': gpu_status.memory_used,
'memory_total': gpu_status.memory_total,
'core_clock': gpu_status.core_clock,
'memory_clock': gpu_status.memory_clock,
'voltage': gpu_status.voltage,
'efficiency': gpu_status.efficiency
}
return result
def get_historical_data(self, gpu_name: str, hours: int = 24) -> List[Dict[str, Any]]:
"""Get historical data for a GPU."""
return self.gpu_manager.get_historical_data(gpu_name, hours)
def get_gpu_list(self) -> List[str]:
"""Get list of available GPUs."""
return self.gpu_manager.get_gpu_list()
def get_fan_profiles(self) -> Dict[str, Any]:
"""Get available fan profiles."""
profiles = self.fan_controller.get_profiles()
result = {}
for name, profile in profiles.items():
result[name] = {
'name': profile.name,
'type': profile.profile_type.value,
'description': profile.description,
'curve': profile.curve,
'safety': profile.safety,
'enabled': profile.enabled
}
return result
def set_fan_profile(self, profile_name: str) -> bool:
"""Set fan profile."""
return self.fan_controller.set_profile(profile_name)
def set_fan_mode(self, mode: str) -> bool:
"""Set fan mode."""
try:
fan_mode = FanMode(mode)
self.fan_controller.set_mode(fan_mode)
return True
except:
return False
def set_manual_pwm(self, pwm: int) -> bool:
"""Set manual PWM."""
if 0 <= pwm <= 255:
self.fan_controller.set_manual_pwm(pwm)
return True
return False
# Initialize web manager
web_manager = WebGPUManager()
@app.route('/')
def index():
"""Main dashboard page."""
return render_template('index.html')
@app.route('/api/status')
def api_status():
"""API endpoint for current status."""
try:
status = web_manager.get_current_status()
return jsonify(status)
except Exception as e:
logger.error(f"Error getting status: {e}")
return jsonify({'error': str(e)}), 500
@app.route('/api/gpus')
def api_gpus():
"""API endpoint for GPU list."""
try:
gpus = web_manager.get_gpu_list()
return jsonify({'gpus': gpus})
except Exception as e:
logger.error(f"Error getting GPU list: {e}")
return jsonify({'error': str(e)}), 500
@app.route('/api/history/<gpu_name>')
def api_history(gpu_name):
"""API endpoint for historical data."""
try:
hours = request.args.get('hours', 24, type=int)
data = web_manager.get_historical_data(gpu_name, hours)
return jsonify({'data': data})
except Exception as e:
logger.error(f"Error getting historical data: {e}")
return jsonify({'error': str(e)}), 500
@app.route('/api/fan/profiles')
def api_fan_profiles():
"""API endpoint for fan profiles."""
try:
profiles = web_manager.get_fan_profiles()
return jsonify({'profiles': profiles})
except Exception as e:
logger.error(f"Error getting fan profiles: {e}")
return jsonify({'error': str(e)}), 500
@app.route('/api/fan/profile', methods=['POST'])
def api_set_fan_profile():
"""API endpoint to set fan profile."""
try:
data = request.get_json()
profile_name = data.get('profile')
if not profile_name:
return jsonify({'error': 'Profile name required'}), 400
success = web_manager.set_fan_profile(profile_name)
if success:
return jsonify({'success': True, 'message': f'Set profile to {profile_name}'})
else:
return jsonify({'error': f'Failed to set profile {profile_name}'}), 400
except Exception as e:
logger.error(f"Error setting fan profile: {e}")
return jsonify({'error': str(e)}), 500
@app.route('/api/fan/mode', methods=['POST'])
def api_set_fan_mode():
"""API endpoint to set fan mode."""
try:
data = request.get_json()
mode = data.get('mode')
if not mode:
return jsonify({'error': 'Mode required'}), 400
success = web_manager.set_fan_mode(mode)
if success:
return jsonify({'success': True, 'message': f'Set mode to {mode}'})
else:
return jsonify({'error': f'Failed to set mode {mode}'}), 400
except Exception as e:
logger.error(f"Error setting fan mode: {e}")
return jsonify({'error': str(e)}), 500
@app.route('/api/fan/manual', methods=['POST'])
def api_set_manual_pwm():
"""API endpoint to set manual PWM."""
try:
data = request.get_json()
pwm = data.get('pwm')
if pwm is None:
return jsonify({'error': 'PWM value required'}), 400
success = web_manager.set_manual_pwm(pwm)
if success:
return jsonify({'success': True, 'message': f'Set manual PWM to {pwm}'})
else:
return jsonify({'error': f'Invalid PWM value: {pwm}'}), 400
except Exception as e:
logger.error(f"Error setting manual PWM: {e}")
return jsonify({'error': str(e)}), 500
@app.route('/api/alerts')
def api_alerts():
"""API endpoint for alerts."""
try:
# Get recent alerts from database
# This would need to be implemented in the GPUDataManager
alerts = [] # Placeholder
return jsonify({'alerts': alerts})
except Exception as e:
logger.error(f"Error getting alerts: {e}")
return jsonify({'error': str(e)}), 500
@app.route('/api/system')
def api_system():
"""API endpoint for system information."""
try:
import psutil
system_info = {
'cpu_count': psutil.cpu_count(),
'cpu_percent': psutil.cpu_percent(interval=1),
'memory': {
'total': psutil.virtual_memory().total // (1024**3), # GB
'available': psutil.virtual_memory().available // (1024**3), # GB
'percent': psutil.virtual_memory().percent
},
'disk': {
'total': psutil.disk_usage('/').total // (1024**3), # GB
'free': psutil.disk_usage('/').free // (1024**3), # GB
'percent': (psutil.disk_usage('/').used / psutil.disk_usage('/').total) * 100
},
'uptime': time.time() - psutil.boot_time()
}
return jsonify(system_info)
except Exception as e:
logger.error(f"Error getting system info: {e}")
return jsonify({'error': str(e)}), 500
@app.errorhandler(404)
def not_found(error):
"""Handle 404 errors."""
return jsonify({'error': 'Not found'}), 404
@app.errorhandler(500)
def internal_error(error):
"""Handle 500 errors."""
return jsonify({'error': 'Internal server error'}), 500
def create_templates():
"""Create HTML templates directory and files."""
templates_dir = Path('templates')
static_dir = Path('static')
templates_dir.mkdir(exist_ok=True)
static_dir.mkdir(exist_ok=True)
# Create main HTML template
index_html = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>GPU Monitoring Dashboard</title>
<style>
:root {
--bg-color: #1a1a1a;
--card-bg: #2d2d2d;
--text-color: #ffffff;
--accent-color: #3498db;
--success-color: #2ecc71;
--warning-color: #f1c40f;
--danger-color: #e74c3c;
}
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
background-color: var(--bg-color);
color: var(--text-color);
margin: 0;
padding: 20px;
}
.header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 20px;
border-bottom: 2px solid var(--accent-color);
padding-bottom: 10px;
}
.header h1 {
margin: 0;
color: var(--accent-color);
}
.container {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px;
}
.card {
background-color: var(--card-bg);
border-radius: 8px;
padding: 20px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.3);
}
.card h3 {
margin-top: 0;
color: var(--accent-color);
}
.metric-grid {
display: grid;
grid-template-columns: repeat(2, 1fr);
gap: 15px;
}
.metric {
background-color: rgba(0, 0, 0, 0.3);
padding: 15px;
border-radius: 4px;
text-align: center;
}
.metric-value {
font-size: 24px;
font-weight: bold;
margin-bottom: 5px;
}
.metric-label {
font-size: 12px;
color: #888;
text-transform: uppercase;
}
.temp-good { color: var(--success-color); }
.temp-warn { color: var(--warning-color); }
.temp-danger { color: var(--danger-color); }
.chart-container {
width: 100%;
height: 300px;
margin-top: 20px;
}
.controls {
display: flex;
gap: 10px;
margin-bottom: 10px;
}
select, button {
padding: 8px 16px;
border: none;
border-radius: 4px;
background-color: var(--accent-color);
color: white;
cursor: pointer;
font-weight: bold;
}
button:hover {
background-color: #2980b9;
}
.status-indicator {
display: inline-block;
width: 10px;
height: 10px;
border-radius: 50%;
background-color: var(--success-color);
margin-right: 5px;
}
.status-offline {
background-color: var(--danger-color);
}
@media (max-width: 768px) {
.container {
grid-template-columns: 1fr;
}
.metric-grid {
grid-template-columns: repeat(2, 1fr);
}
}
</style>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
</head>
<body>
<div class="header">
<h1>GPU Monitoring Dashboard</h1>
<div>
<span class="status-indicator" id="status-indicator"></span>
<span id="status-text">Connecting...</span>
</div>
</div>
<div class="container">
<div class="card">
<h3>Real-time Status</h3>
<div class="metric-grid" id="metrics-grid">
<!-- Metrics will be populated by JavaScript -->
</div>
</div>
<div class="card">
<h3>Fan Control</h3>
<div class="controls">
<select id="profile-select">
<option value="">Select Profile</option>
</select>
<button onclick="setProfile()">Apply Profile</button>
</div>
<div class="controls">
<input type="number" id="manual-pwm" min="0" max="255" value="0" style="padding: 8px; border-radius: 4px; border: 1px solid #555; background: #333; color: white;">
<button onclick="setManualPWM()">Set Manual PWM</button>
</div>
<div style="margin-top: 15px;">
<div>Fan Mode: <span id="fan-mode">--</span></div>
<div>Current Profile: <span id="current-profile">--</span></div>
<div>Current PWM: <span id="current-pwm">--</span>%</div>
</div>
</div>
<div class="card" style="grid-column: 1 / -1;">
<h3>Temperature History</h3>
<div class="controls">
<select id="gpu-select">
<option value="">Select GPU</option>
</select>
<select id="hours-select">
<option value="1">1 Hour</option>
<option value="6">6 Hours</option>
<option value="24" selected>24 Hours</option>
<option value="168">7 Days</option>
</select>
<button onclick="loadHistory()">Load History</button>
</div>
<div class="chart-container">
<canvas id="temp-chart"></canvas>
</div>
</div>
</div>
<script>
let tempChart;
let updateInterval;
// Initialize chart
function initChart() {
const ctx = document.getElementById('temp-chart').getContext('2d');
tempChart = new Chart(ctx, {
type: 'line',
data: {
labels: [],
datasets: [{
label: 'Temperature (°C)',
data: [],
borderColor: '#3498db',
backgroundColor: 'rgba(52, 152, 219, 0.1)',
borderWidth: 2,
fill: true
}]
},
options: {
responsive: true,
maintainAspectRatio: false,
scales: {
y: {
beginAtZero: true,
max: 100
}
}
}
});
}
// Update real-time status
function updateStatus() {
fetch('/api/status')
.then(response => response.json())
.then(data => {
updateMetrics(data);
updateFanControl(data.fan_control);
updateStatusIndicator(true);
})
.catch(error => {
console.error('Error fetching status:', error);
updateStatusIndicator(false);
});
}
// Update metrics display
function updateMetrics(data) {
const grid = document.getElementById('metrics-grid');
grid.innerHTML = '';
for (const [gpuName, gpuData] of Object.entries(data.gpus)) {
const card = document.createElement('div');
card.className = 'card';
card.innerHTML = `
<h4>${gpuName}</h4>
<div class="metric-grid">
<div class="metric">
<div class="metric-value temp-${getTempClass(gpuData.temperature)}">${gpuData.temperature.toFixed(1)}°C</div>
<div class="metric-label">Temperature</div>
</div>
<div class="metric">
<div class="metric-value">${gpuData.load.toFixed(1)}%</div>
<div class="metric-label">Load</div>
</div>
<div class="metric">
<div class="metric-value">${gpuData.fan_speed} RPM</div>
<div class="metric-label">Fan Speed</div>
</div>
<div class="metric">
<div class="metric-value">${gpuData.power_draw.toFixed(1)} W</div>
<div class="metric-label">Power</div>
</div>
<div class="metric">
<div class="metric-value">${gpuData.memory_used}/${gpuData.memory_total} MB</div>
<div class="metric-label">VRAM</div>
</div>
<div class="metric">
<div class="metric-value">${gpuData.core_clock} MHz</div>
<div class="metric-label">Core Clock</div>
</div>
</div>
`;
grid.appendChild(card);
}
}
// Get temperature color class
function getTempClass(temp) {
if (temp < 60) return 'good';
if (temp < 75) return 'warn';
return 'danger';
}
// Update fan control display
function updateFanControl(fanData) {
document.getElementById('fan-mode').textContent = fanData.mode;
document.getElementById('current-profile').textContent = fanData.profile;
document.getElementById('current-pwm').textContent = fanData.current_pwm;
}
// Update status indicator
function updateStatusIndicator(online) {
const indicator = document.getElementById('status-indicator');
const text = document.getElementById('status-text');
if (online) {
indicator.className = 'status-indicator';
text.textContent = 'Online';
} else {
indicator.className = 'status-indicator status-offline';
text.textContent = 'Offline';
}
}
// Load fan profiles
function loadProfiles() {
fetch('/api/fan/profiles')
.then(response => response.json())
.then(data => {
const select = document.getElementById('profile-select');
select.innerHTML = '<option value="">Select Profile</option>';
for (const [name, profile] of Object.entries(data.profiles)) {
if (profile.enabled) {
const option = document.createElement('option');
option.value = name;
option.textContent = profile.name;
select.appendChild(option);
}
}
})
.catch(error => console.error('Error loading profiles:', error));
}
// Load GPU list
function loadGPUs() {
fetch('/api/gpus')
.then(response => response.json())
.then(data => {
const select = document.getElementById('gpu-select');
select.innerHTML = '<option value="">Select GPU</option>';
data.gpus.forEach(gpu => {
const option = document.createElement('option');
option.value = gpu;
option.textContent = gpu;
select.appendChild(option);
});
})
.catch(error => console.error('Error loading GPUs:', error));
}
// Set fan profile
function setProfile() {
const profile = document.getElementById('profile-select').value;
if (!profile) return;
fetch('/api/fan/profile', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ profile: profile })
})
.then(response => response.json())
.then(data => {
if (data.success) {
alert('Profile updated successfully');
updateStatus();
} else {
alert('Error: ' + data.error);
}
})
.catch(error => {
console.error('Error:', error);
alert('Error updating profile');
});
}
// Set manual PWM
function setManualPWM() {
const pwm = parseInt(document.getElementById('manual-pwm').value);
if (isNaN(pwm) || pwm < 0 || pwm > 255) {
alert('Please enter a valid PWM value (0-255)');
return;
}
fetch('/api/fan/manual', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ pwm: pwm })
})
.then(response => response.json())
.then(data => {
if (data.success) {
alert('Manual PWM set successfully');
updateStatus();
} else {
alert('Error: ' + data.error);
}
})
.catch(error => {
console.error('Error:', error);
alert('Error setting manual PWM');
});
}
// Load historical data
function loadHistory() {
const gpu = document.getElementById('gpu-select').value;
const hours = document.getElementById('hours-select').value;
if (!gpu) {
alert('Please select a GPU');
return;
}
fetch(`/api/history/${encodeURIComponent(gpu)}?hours=${hours}`)
.then(response => response.json())
.then(data => {
updateChart(data.data);
})
.catch(error => {
console.error('Error loading history:', error);
alert('Error loading historical data');
});
}
// Update chart with historical data
function updateChart(data) {
if (!data || data.length === 0) return;
const labels = data.map(d => new Date(d.timestamp * 1000).toLocaleTimeString());
const temps = data.map(d => d.temperature);
tempChart.data.labels = labels;
tempChart.data.datasets[0].data = temps;
tempChart.update();
}
// Initialize application
document.addEventListener('DOMContentLoaded', function() {
initChart();
loadProfiles();
loadGPUs();
updateStatus();
// Update status every 2 seconds
updateInterval = setInterval(updateStatus, 2000);
});
</script>
</body>
</html>
"""
with open(templates_dir / 'index.html', 'w') as f:
f.write(index_html)
if __name__ == '__main__':
# Create templates if they don't exist
create_templates()
# Get configuration
config = web_manager.config
# Start Flask app
app.run(
host=config.get('host', '0.0.0.0'),
port=config.get('port', 5000),
debug=config.get('debug', False)
)