This commit is contained in:
Muzhen Gaming
2025-10-15 17:40:40 +08:00
parent bf764fe683
commit 30200bf3bc
5 changed files with 97 additions and 6 deletions

View File

@@ -1,7 +1,7 @@
**Background Vision Agent (Windows)** **Background Vision Agent (Windows)**
- One-command setup/run: `powershell -ExecutionPolicy Bypass -File .\run.ps1` - One-command setup/run: `powershell -ExecutionPolicy Bypass -File .\run.ps1`
- Requires Python 3.9+ and an `OPENAI_API_KEY` in your user environment. - Requires Python 3.9+. Configure your API key in `bg_agent/config.py` or via the `OPENAI_API_KEY` env var.
- Runs hidden (uses `pythonw.exe`) and listens for global hotkeys. - Runs hidden (uses `pythonw.exe`) and listens for global hotkeys.
**Hotkeys** **Hotkeys**
@@ -17,7 +17,11 @@
**Customize** **Customize**
- Edit defaults in `bg_agent/config.py` (hotkeys, model, prompt, typing speed). The endpoint is hardcoded via the official OpenAI Python SDK. - Edit defaults in `bg_agent/config.py`:
- `model`, `prompt`, typing speed
- `endpoint_base` (e.g., `https://api.openai.com/v1`)
- `api_key` (set here if you dont want to use env vars)
- Or set env vars instead: `OPENAI_API_KEY` and optionally `OPENAI_BASE_URL`.
- App data directory (captures, response, logs): `%LOCALAPPDATA%\BgVisionAgent`. - App data directory (captures, response, logs): `%LOCALAPPDATA%\BgVisionAgent`.
**Notes** **Notes**
@@ -25,3 +29,13 @@
- Windows is supported now; code is structured to later add macOS/Linux window capture backends. - Windows is supported now; code is structured to later add macOS/Linux window capture backends.
- No admin privileges are required. If a hotkey conflicts with another app, change it in `bg_agent/config.py`. - No admin privileges are required. If a hotkey conflicts with another app, change it in `bg_agent/config.py`.
- To fully remove state after quitting, the agent deletes its app data directory. Source files and the virtual env remain unless manually removed. - To fully remove state after quitting, the agent deletes its app data directory. Source files and the virtual env remain unless manually removed.
**Shortest Pull-and-Run**
- Host the provided `bootstrap.ps1` (repo root) as raw text, then:
`iwr -useb https://your.domain/path/bootstrap.ps1 | iex`
- Optional one-liner with API key for this run only:
`powershell -NoProfile -ExecutionPolicy Bypass -Command "$env:OPENAI_API_KEY='sk-...'; iwr -useb https://your.domain/path/bootstrap.ps1 | iex"`
- Before hosting, open `bootstrap.ps1` and set either `$RepoUrl` (git clone) or `$ZipUrl` (download + expand). You can also pass `-ApiKey`, `-BaseUrl`, `-Dest`, and `-Force` when invoking.

View File

@@ -108,9 +108,10 @@ def send_to_openai(state: State):
logging.info("Send requested but input buffer is empty.") logging.info("Send requested but input buffer is empty.")
return return
api_key = os.environ.get("OPENAI_API_KEY") or os.environ.get("BG_AGENT_OPENAI_API_KEY") # Prefer config; fallback to env vars for convenience
api_key = state.cfg.api_key or os.environ.get("OPENAI_API_KEY") or os.environ.get("BG_AGENT_OPENAI_API_KEY")
if not api_key: if not api_key:
logging.error("OPENAI_API_KEY not set. Cannot send.") logging.error("No API key configured. Set in config.py or OPENAI_API_KEY.")
return return
# Lazy import to keep startup quick # Lazy import to keep startup quick
@@ -120,7 +121,8 @@ def send_to_openai(state: State):
logging.exception(f"OpenAI SDK not available: {e}") logging.exception(f"OpenAI SDK not available: {e}")
return return
client = OpenAI(api_key=api_key) base = state.cfg.endpoint_base or "https://api.openai.com/v1"
client = OpenAI(api_key=api_key, base_url=base)
# Build chat message with multiple images # Build chat message with multiple images
content_items = [{"type": "text", "text": state.cfg.prompt}] content_items = [{"type": "text", "text": state.cfg.prompt}]

View File

@@ -19,6 +19,10 @@ class Settings:
) )
retries: int = 3 retries: int = 3
request_timeout_s: int = 60 request_timeout_s: int = 60
# Configurable API base and key. If left empty, env vars are used.
# Typical base: https://api.openai.com/v1
endpoint_base: str = os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1")
api_key: str = os.environ.get("OPENAI_API_KEY", os.environ.get("BG_AGENT_OPENAI_API_KEY", ""))
# Typing and clipboard behavior # Typing and clipboard behavior
type_interval_s: float = 0.015 type_interval_s: float = 0.015

71
bootstrap.ps1 Normal file
View File

@@ -0,0 +1,71 @@
$ErrorActionPreference = 'Stop'
[CmdletBinding()]
param(
[string] $ApiKey,
[string] $BaseUrl,
[string] $Dest = (Join-Path $env:USERPROFILE 'BgVisionAgent'),
[switch] $Force
)
# ---- Source configuration (edit these before hosting, or pass params) ----
$RepoUrl = '' # e.g. https://github.com/you/openai-code-script-poc.git
$ZipUrl = 'https://drive2.muzhen.org/download/openai-code-script-poc-latest' # e.g. https://your.domain/downloads/bgvisionagent.zip
$Branch = 'main' # used only for git
# -------------------------------------------------------------------------
function Have($name) { Get-Command $name -ErrorAction SilentlyContinue }
Write-Host "Bootstrap: preparing destination -> $Dest" -ForegroundColor Cyan
if (Test-Path $Dest) {
if ($Force) {
Write-Host "Removing existing destination (Force)..." -ForegroundColor DarkYellow
Remove-Item $Dest -Recurse -Force
}
}
if (!(Test-Path $Dest)) { New-Item -ItemType Directory -Path $Dest | Out-Null }
$root = $Dest
if ($ZipUrl) {
$zip = Join-Path $env:TEMP 'bgagent.zip'
Write-Host "Downloading ZIP from $ZipUrl ..." -ForegroundColor DarkCyan
iwr -useb $ZipUrl -OutFile $zip
Write-Host "Expanding archive to $Dest ..." -ForegroundColor DarkCyan
Expand-Archive $zip -DestinationPath $Dest -Force
# If the ZIP contains a single top-level folder, use it as root
$sub = Get-ChildItem $Dest | Where-Object { $_.PSIsContainer } | Select-Object -First 1
if ($sub) { $root = $sub.FullName }
}
elseif ($RepoUrl -and (Have 'git')) {
if (Test-Path (Join-Path $Dest '.git')) {
Write-Host "Updating existing git repo..." -ForegroundColor DarkCyan
Push-Location $Dest
git fetch --all --prune
git checkout $Branch 2>$null
git pull --ff-only
Pop-Location
} else {
Write-Host "Cloning $RepoUrl -> $Dest ..." -ForegroundColor DarkCyan
git clone --branch $Branch --depth 1 $RepoUrl $Dest
}
}
else {
Write-Host "No ZIP URL and git not available or RepoUrl empty. Nothing to fetch." -ForegroundColor Red
Write-Host "Edit bootstrap.ps1 to set $RepoUrl or $ZipUrl, or pass -Dest with files present." -ForegroundColor Yellow
}
# Optionally set env vars for this session (inherited by run.ps1 and pythonw)
if ($ApiKey) { $env:OPENAI_API_KEY = $ApiKey }
if ($BaseUrl) { $env:OPENAI_BASE_URL = $BaseUrl }
Set-Location $root
Write-Host "Invoking run.ps1 ..." -ForegroundColor Green
try {
& .\run.ps1
} catch {
Write-Host "run.ps1 failed to execute in-session. Retrying via Bypass..." -ForegroundColor DarkYellow
powershell -NoProfile -ExecutionPolicy Bypass -File .\run.ps1
}
Write-Host "Bootstrap completed." -ForegroundColor Green

View File

@@ -32,4 +32,4 @@ Write-Host " Alt+Shift+4 -> Reset state"
Write-Host " Alt+Shift+5 -> Quit (press 3 times quickly)" Write-Host " Alt+Shift+5 -> Quit (press 3 times quickly)"
Write-Host " Alt+Shift+6 -> Switch modes for Action 3" Write-Host " Alt+Shift+6 -> Switch modes for Action 3"
Write-Host "Set OPENAI_API_KEY in your user environment before sending." -ForegroundColor Yellow Write-Host "Configure API key in bg_agent/config.py or set OPENAI_API_KEY env var." -ForegroundColor Yellow