mirror of
https://github.com/yyhuni/xingrin.git
synced 2026-01-31 11:46:16 +08:00
393 lines
10 KiB
Go
393 lines
10 KiB
Go
package activity
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strings"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/orbit/worker/internal/pkg"
|
|
"github.com/shirou/gopsutil/v3/cpu"
|
|
"github.com/shirou/gopsutil/v3/mem"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
// ansiRegex matches ANSI escape sequences (colors, cursor movement, etc.)
|
|
var ansiRegex = regexp.MustCompile(`\x1b\[[0-9;]*[a-zA-Z]`)
|
|
|
|
// controlCharReplacer removes control characters in a single pass
|
|
var controlCharReplacer = strings.NewReplacer(
|
|
"\x00", "", // NUL
|
|
"\r", "", // CR
|
|
"\b", "", // Backspace
|
|
"\f", "", // Form feed
|
|
"\v", "", // Vertical tab
|
|
)
|
|
|
|
const (
|
|
DefaultDirPerm = 0755
|
|
ExitCodeTimeout = -1
|
|
ExitCodeError = -1
|
|
|
|
// System load thresholds
|
|
DefaultCPUThreshold = 90.0 // CPU usage percentage
|
|
DefaultMemThreshold = 80.0 // Memory usage percentage
|
|
LoadCheckInterval = 180 * time.Second // 3 minutes between checks
|
|
CommandStartupDelay = 5 * time.Second // Initial delay before first check
|
|
|
|
// Scanner buffer sizes
|
|
ScannerInitBufSize = 64 * 1024 // 64KB initial buffer
|
|
ScannerMaxBufSize = 1024 * 1024 // 1MB max buffer for long lines
|
|
)
|
|
|
|
// Result represents the result of an activity execution
|
|
type Result struct {
|
|
Name string
|
|
OutputFile string
|
|
LogFile string
|
|
ExitCode int
|
|
Duration time.Duration
|
|
Error error
|
|
}
|
|
|
|
// Command represents a command to execute
|
|
type Command struct {
|
|
Name string
|
|
Command string
|
|
OutputFile string
|
|
LogFile string
|
|
Timeout time.Duration
|
|
}
|
|
|
|
// Runner executes activities (external tools)
|
|
type Runner struct {
|
|
workDir string
|
|
}
|
|
|
|
// NewRunner creates a new activity runner
|
|
func NewRunner(workDir string) *Runner {
|
|
return &Runner{workDir: workDir}
|
|
}
|
|
|
|
// killProcessGroup terminates the entire process group
|
|
// When using shell=true (sh -c), the actual tool runs as a child of the shell.
|
|
// If we only kill the shell process, the child becomes an orphan and keeps running.
|
|
// By killing the process group, we ensure all child processes are terminated.
|
|
func killProcessGroup(cmd *exec.Cmd) {
|
|
if cmd == nil || cmd.Process == nil {
|
|
return
|
|
}
|
|
|
|
pid := cmd.Process.Pid
|
|
|
|
// Try to kill the process group first
|
|
// The negative PID signals the entire process group
|
|
if err := syscall.Kill(-pid, syscall.SIGKILL); err != nil {
|
|
pkg.Logger.Debug("Failed to kill process group, trying single process",
|
|
zap.Int("pid", pid),
|
|
zap.Error(err))
|
|
// Fallback: kill single process
|
|
_ = cmd.Process.Kill()
|
|
} else {
|
|
pkg.Logger.Debug("Killed process group", zap.Int("pgid", pid))
|
|
}
|
|
}
|
|
|
|
// waitForSystemLoad blocks until system CPU and memory usage are below thresholds.
|
|
// This prevents OOM when starting multiple concurrent commands.
|
|
func waitForSystemLoad(ctx context.Context) {
|
|
// Initial delay to let previous commands consume resources
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-time.After(CommandStartupDelay):
|
|
}
|
|
|
|
for {
|
|
cpu, mem, err := getSystemLoad()
|
|
if err != nil {
|
|
pkg.Logger.Debug("Failed to get system load, skipping check", zap.Error(err))
|
|
return
|
|
}
|
|
|
|
if cpu < DefaultCPUThreshold && mem < DefaultMemThreshold {
|
|
return
|
|
}
|
|
|
|
pkg.Logger.Info("System load high, waiting before starting command",
|
|
zap.Float64("cpu", cpu),
|
|
zap.Float64("cpuThreshold", DefaultCPUThreshold),
|
|
zap.Float64("mem", mem),
|
|
zap.Float64("memThreshold", DefaultMemThreshold))
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-time.After(LoadCheckInterval):
|
|
}
|
|
}
|
|
}
|
|
|
|
// getSystemLoad returns current CPU and memory usage percentages using gopsutil.
|
|
// Correctly handles container cgroup limits.
|
|
func getSystemLoad() (cpuPercent, memPercent float64, err error) {
|
|
// Get CPU usage (0 interval means since last call, false means aggregate all CPUs)
|
|
cpuPercents, err := cpu.Percent(0, false)
|
|
if err != nil {
|
|
return 0, 0, fmt.Errorf("failed to get CPU usage: %w", err)
|
|
}
|
|
if len(cpuPercents) == 0 {
|
|
return 0, 0, fmt.Errorf("no CPU data available")
|
|
}
|
|
|
|
// Get memory usage
|
|
memInfo, err := mem.VirtualMemory()
|
|
if err != nil {
|
|
return 0, 0, fmt.Errorf("failed to get memory usage: %w", err)
|
|
}
|
|
|
|
return cpuPercents[0], memInfo.UsedPercent, nil
|
|
}
|
|
|
|
// Run executes a single activity with streaming output
|
|
func (r *Runner) Run(ctx context.Context, cmd Command) *Result {
|
|
start := time.Now()
|
|
result := &Result{
|
|
Name: cmd.Name,
|
|
OutputFile: cmd.OutputFile,
|
|
LogFile: cmd.LogFile,
|
|
}
|
|
|
|
if ctx.Err() != nil {
|
|
result.Error = fmt.Errorf("context cancelled before execution: %w", ctx.Err())
|
|
result.ExitCode = ExitCodeError
|
|
return result
|
|
}
|
|
|
|
// Wait for system load to be acceptable before starting
|
|
waitForSystemLoad(ctx)
|
|
if ctx.Err() != nil {
|
|
result.Error = fmt.Errorf("context cancelled while waiting for system load: %w", ctx.Err())
|
|
result.ExitCode = ExitCodeError
|
|
return result
|
|
}
|
|
|
|
execCtx, cancel := context.WithTimeout(ctx, cmd.Timeout)
|
|
defer cancel()
|
|
|
|
execCmd := exec.CommandContext(execCtx, "sh", "-c", cmd.Command)
|
|
execCmd.Dir = r.workDir
|
|
// Create new process group so we can kill all child processes
|
|
execCmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
|
|
|
|
// Setup pipes for streaming
|
|
stdout, err := execCmd.StdoutPipe()
|
|
if err != nil {
|
|
result.Error = fmt.Errorf("failed to create stdout pipe: %w", err)
|
|
result.ExitCode = ExitCodeError
|
|
return result
|
|
}
|
|
|
|
stderr, err := execCmd.StderrPipe()
|
|
if err != nil {
|
|
result.Error = fmt.Errorf("failed to create stderr pipe: %w", err)
|
|
result.ExitCode = ExitCodeError
|
|
return result
|
|
}
|
|
|
|
// Prepare log file
|
|
logFile := r.prepareLogFile(cmd)
|
|
if logFile != nil {
|
|
defer func() { _ = logFile.Close() }()
|
|
r.writeLogHeader(logFile, cmd)
|
|
}
|
|
|
|
// Start command
|
|
if err := execCmd.Start(); err != nil {
|
|
result.Error = fmt.Errorf("failed to start command: %w", err)
|
|
result.ExitCode = ExitCodeError
|
|
return result
|
|
}
|
|
|
|
// Ensure process cleanup on any exit path
|
|
defer killProcessGroup(execCmd)
|
|
|
|
// Stream output
|
|
var wg sync.WaitGroup
|
|
wg.Add(2)
|
|
|
|
go r.streamOutput(&wg, stdout, logFile, cmd.Name, "stdout")
|
|
go r.streamOutput(&wg, stderr, logFile, cmd.Name, "stderr")
|
|
|
|
wg.Wait()
|
|
|
|
// Wait for command to finish
|
|
err = execCmd.Wait()
|
|
result.Duration = time.Since(start)
|
|
|
|
// Write duration to log
|
|
if logFile != nil {
|
|
r.writeLogFooter(logFile, result)
|
|
}
|
|
|
|
// Handle result
|
|
if execCtx.Err() == context.DeadlineExceeded {
|
|
result.Error = fmt.Errorf("activity execution timeout after %v", cmd.Timeout)
|
|
result.ExitCode = ExitCodeTimeout
|
|
pkg.Logger.Error("Activity timeout",
|
|
zap.String("activity", cmd.Name),
|
|
zap.Duration("timeout", cmd.Timeout))
|
|
return result
|
|
}
|
|
|
|
if err != nil {
|
|
if exitErr, ok := err.(*exec.ExitError); ok {
|
|
result.ExitCode = exitErr.ExitCode()
|
|
} else {
|
|
result.ExitCode = ExitCodeError
|
|
}
|
|
result.Error = fmt.Errorf("activity execution failed: %w", err)
|
|
pkg.Logger.Error("Activity failed",
|
|
zap.String("activity", cmd.Name),
|
|
zap.Int("exitCode", result.ExitCode),
|
|
zap.Error(err))
|
|
return result
|
|
}
|
|
|
|
result.ExitCode = 0
|
|
pkg.Logger.Info("Activity completed",
|
|
zap.String("activity", cmd.Name),
|
|
zap.Duration("duration", result.Duration))
|
|
|
|
return result
|
|
}
|
|
|
|
// RunParallel executes multiple activities in parallel
|
|
func (r *Runner) RunParallel(ctx context.Context, commands []Command) []*Result {
|
|
if len(commands) == 0 {
|
|
return nil
|
|
}
|
|
|
|
results := make([]*Result, len(commands))
|
|
var wg sync.WaitGroup
|
|
|
|
for i, cmd := range commands {
|
|
if ctx.Err() != nil {
|
|
results[i] = &Result{
|
|
Name: cmd.Name,
|
|
ExitCode: ExitCodeError,
|
|
Error: fmt.Errorf("context cancelled: %w", ctx.Err()),
|
|
}
|
|
continue
|
|
}
|
|
|
|
wg.Add(1)
|
|
go func(idx int, c Command) {
|
|
defer wg.Done()
|
|
results[idx] = r.Run(ctx, c)
|
|
}(i, cmd)
|
|
}
|
|
|
|
wg.Wait()
|
|
return results
|
|
}
|
|
|
|
func (r *Runner) prepareLogFile(cmd Command) *os.File {
|
|
if cmd.LogFile == "" {
|
|
return nil
|
|
}
|
|
|
|
dir := filepath.Dir(cmd.LogFile)
|
|
if err := os.MkdirAll(dir, DefaultDirPerm); err != nil {
|
|
pkg.Logger.Warn("Failed to create log directory",
|
|
zap.String("activity", cmd.Name),
|
|
zap.Error(err))
|
|
return nil
|
|
}
|
|
|
|
f, err := os.Create(cmd.LogFile)
|
|
if err != nil {
|
|
pkg.Logger.Warn("Failed to create log file",
|
|
zap.String("activity", cmd.Name),
|
|
zap.Error(err))
|
|
return nil
|
|
}
|
|
|
|
return f
|
|
}
|
|
|
|
func (r *Runner) streamOutput(wg *sync.WaitGroup, reader io.Reader, logFile *os.File, activityName, streamName string) {
|
|
defer wg.Done()
|
|
|
|
scanner := bufio.NewScanner(reader)
|
|
scanner.Buffer(make([]byte, ScannerInitBufSize), ScannerMaxBufSize)
|
|
|
|
for scanner.Scan() {
|
|
line := cleanLine(scanner.Text())
|
|
if line == "" {
|
|
continue
|
|
}
|
|
|
|
// Write to log file
|
|
if logFile != nil {
|
|
_, _ = fmt.Fprintln(logFile, line)
|
|
}
|
|
|
|
// Log debug output (optional, can be removed for less verbose logs)
|
|
pkg.Logger.Debug("Activity output",
|
|
zap.String("activity", activityName),
|
|
zap.String("stream", streamName),
|
|
zap.String("line", line))
|
|
}
|
|
|
|
if err := scanner.Err(); err != nil {
|
|
pkg.Logger.Warn("Error reading output stream",
|
|
zap.String("activity", activityName),
|
|
zap.String("stream", streamName),
|
|
zap.Error(err))
|
|
}
|
|
}
|
|
|
|
// cleanLine removes ANSI escape sequences and control characters from output
|
|
func cleanLine(line string) string {
|
|
line = ansiRegex.ReplaceAllString(line, "")
|
|
line = controlCharReplacer.Replace(line)
|
|
return strings.TrimSpace(line)
|
|
}
|
|
|
|
const logSeparator = "============================================================"
|
|
|
|
func (r *Runner) writeLogHeader(f *os.File, cmd Command) {
|
|
_, _ = fmt.Fprintf(f, "$ %s\n", cmd.Command)
|
|
_, _ = fmt.Fprintln(f, logSeparator)
|
|
_, _ = fmt.Fprintf(f, "# Tool: %s\n", cmd.Name)
|
|
_, _ = fmt.Fprintf(f, "# Started: %s\n", time.Now().Format("2006-01-02 15:04:05"))
|
|
_, _ = fmt.Fprintf(f, "# Timeout: %v\n", cmd.Timeout)
|
|
_, _ = fmt.Fprintln(f, "# Status: Running...")
|
|
_, _ = fmt.Fprintln(f, logSeparator)
|
|
_, _ = fmt.Fprintln(f)
|
|
}
|
|
|
|
func (r *Runner) writeLogFooter(f *os.File, result *Result) {
|
|
status := "✓ Success"
|
|
if result.ExitCode != 0 {
|
|
status = "✗ Failed"
|
|
}
|
|
|
|
_, _ = fmt.Fprintln(f)
|
|
_, _ = fmt.Fprintln(f, logSeparator)
|
|
_, _ = fmt.Fprintf(f, "# Finished: %s\n", time.Now().Format("2006-01-02 15:04:05"))
|
|
_, _ = fmt.Fprintf(f, "# Duration: %.2fs\n", result.Duration.Seconds())
|
|
_, _ = fmt.Fprintf(f, "# Exit Code: %d\n", result.ExitCode)
|
|
_, _ = fmt.Fprintf(f, "# Status: %s\n", status)
|
|
_, _ = fmt.Fprintln(f, logSeparator)
|
|
}
|