diff --git a/README.md b/README.md index 90f633c..f1c783b 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,14 @@ - Or set env vars instead: `OPENAI_API_KEY` and optionally `OPENAI_BASE_URL`. - App data directory (captures, response, logs): `%LOCALAPPDATA%\BgVisionAgent`. +**Debug Logging** + +- Enable detailed step-by-step logs by setting either env var before launch: + - PowerShell: ``$env:BG_AGENT_DEBUG='1'`` (or ``$env:DEBUG='1'``) + - Cmd: ``set BG_AGENT_DEBUG=1`` +- When enabled, logs are written to `%LOCALAPPDATA%\BgVisionAgent\agent.log` at DEBUG level. +- When not enabled, only warnings/errors go to stderr; no log file is written. + **Notes** - Windows is supported now; code is structured to later add macOS/Linux window capture backends. diff --git a/bg_agent/agent.py b/bg_agent/agent.py index f73f48d..1aefb60 100644 --- a/bg_agent/agent.py +++ b/bg_agent/agent.py @@ -35,6 +35,7 @@ class State: def reset(self): # Delete captures on reset + logging.debug("State.reset: deleting %d captured images", len(self.input_images)) for p in list(self.input_images): try: if os.path.exists(p): @@ -51,14 +52,51 @@ class State: except Exception: pass +def _get_env_bool(name: str, default: bool = False) -> bool: + val = os.environ.get(name) + if val is None: + return default + val = str(val).strip().lower() + return val in {"1", "true", "yes", "on"} + def _setup_logging(log_path: str): - logging.basicConfig( - filename=log_path, - level=logging.INFO, - format="%(asctime)s [%(levelname)s] %(message)s", + """Configure logging. + + - If BG_AGENT_DEBUG or DEBUG env var is truthy, log DEBUG to file at `log_path`. + - Otherwise, log WARNING+ to stderr only (no file written). + """ + debug_enabled = _get_env_bool("BG_AGENT_DEBUG") or _get_env_bool("DEBUG") + + root = logging.getLogger() + # Clear existing handlers to avoid duplicates when re-run + for h in list(root.handlers): + root.removeHandler(h) + + fmt = logging.Formatter( + fmt="%(asctime)s [%(levelname)s] %(name)s.%(funcName)s:%(lineno)d - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", ) - # Also log minimal errors to a rotating in-memory handler if needed + + if debug_enabled: + # Ensure directory exists for log file + try: + os.makedirs(os.path.dirname(log_path), exist_ok=True) + except Exception: + pass + fh = logging.FileHandler(log_path, encoding="utf-8") + fh.setLevel(logging.DEBUG) + fh.setFormatter(fmt) + root.addHandler(fh) + root.setLevel(logging.DEBUG) + logging.debug("Debug logging enabled; writing to %s", log_path) + else: + sh = logging.StreamHandler() + sh.setLevel(logging.WARNING) + sh.setFormatter(fmt) + root.addHandler(sh) + root.setLevel(logging.WARNING) + # No file logging when debug is off def _now_stamp() -> str: @@ -67,6 +105,7 @@ def _now_stamp() -> str: def capture_active_window(state: State): """Capture the current active window (Windows). Fallback to full screen if needed.""" + logging.debug("capture_active_window: start; captures_dir=%s", state.captures_dir) fname = f"capture-{_now_stamp()}.png" out_path = os.path.join(state.captures_dir, fname) @@ -81,6 +120,7 @@ def capture_active_window(state: State): # rect: (left, top, right, bottom) if rect and rect[2] > rect[0] and rect[3] > rect[1]: bbox = rect + logging.debug("capture_active_window: hwnd=%s rect=%s", hwnd, rect) except Exception as e: logging.warning(f"win32gui active window capture failed, fallback to full-screen: {e}") @@ -92,11 +132,13 @@ def capture_active_window(state: State): img.save(out_path, format="PNG") state.input_images.append(out_path) logging.info(f"Captured window -> {out_path}") + logging.debug("capture_active_window: end; total buffered images=%d", len(state.input_images)) except Exception as e: logging.exception(f"Capture failed: {e}") def _read_image_b64(path: str) -> str: + logging.debug("_read_image_b64: reading %s", path) with open(path, "rb") as f: b = f.read() return base64.b64encode(b).decode("ascii") @@ -104,6 +146,11 @@ def _read_image_b64(path: str) -> str: def send_to_openai(state: State): """Send images + prompt to OpenAI; store response in state.response_text. Retries on failure.""" + logging.debug( + "send_to_openai: start; images=%d prompt_len=%d", + len(state.input_images), + len(state.cfg.prompt or ""), + ) if not state.input_images: logging.info("Send requested but input buffer is empty.") return @@ -123,6 +170,7 @@ def send_to_openai(state: State): base = state.cfg.endpoint_base or "https://api.openai.com/v1" client = OpenAI(api_key=api_key, base_url=base) + logging.debug("send_to_openai: base_url=%s model=%s", base, state.cfg.model) # Build chat message with multiple images content_items = [{"type": "text", "text": state.cfg.prompt}] @@ -135,11 +183,13 @@ def send_to_openai(state: State): }) except Exception as e: logging.warning(f"Skipping image {p}: {e}") + logging.debug("send_to_openai: built content_items; count=%d", len(content_items)) attempts = max(1, state.cfg.retries) last_err = None for i in range(attempts): try: + logging.debug("send_to_openai: attempt %d/%d", i + 1, attempts) resp = client.chat.completions.create( model=state.cfg.model, messages=[{"role": "user", "content": content_items}], @@ -152,6 +202,11 @@ def send_to_openai(state: State): except Exception: pass logging.info("OpenAI response received and stored.") + logging.debug( + "send_to_openai: response_len=%d written_to=%s", + len(text), + state.response_path, + ) return except Exception as e: last_err = e @@ -171,6 +226,7 @@ def type_response(state: State): logging.info("Typing already in progress; skipping new request.") return try: + logging.debug("type_response: len=%d interval=%.3f", len(text), state.cfg.type_interval_s) pyautogui.typewrite(text, interval=state.cfg.type_interval_s) logging.info("Typed response into active field.") finally: @@ -178,6 +234,7 @@ def type_response(state: State): def _set_clip_char(state: State): + logging.debug("_set_clip_char: index=%d total_len=%d", state.clip_index, len(state.response_text)) if state.clip_index < 0 or state.clip_index >= len(state.response_text): return False ch = state.response_text[state.clip_index] @@ -193,6 +250,7 @@ def start_clipboard_mode(state: State): state.clip_index = 0 if _set_clip_char(state): logging.info("Clipboard mode primed with first character.") + logging.debug("start_clipboard_mode: primed; total_len=%d", len(text)) def on_paste_event(state: State): @@ -202,6 +260,7 @@ def on_paste_event(state: State): if not state.response_text: return state.clip_index += 1 + logging.debug("on_paste_event: advanced to index=%d", state.clip_index) if state.clip_index >= len(state.response_text): # End: clear clipboard pyperclip.copy("") @@ -211,11 +270,14 @@ def on_paste_event(state: State): def toggle_mode(state: State): + old = state.mode state.mode = 2 if state.mode == 1 else 1 logging.info(f"Switched action3 mode -> {state.mode}") + logging.debug("toggle_mode: %d -> %d", old, state.mode) def handle_action3(state: State): + logging.debug("handle_action3: mode=%d", state.mode) if state.mode == 1: type_response(state) else: @@ -225,11 +287,13 @@ def handle_action3(state: State): def reset_state(state: State): state.reset() logging.info("State reset: buffers cleared and captures removed.") + logging.debug("reset_state: done") def maybe_quit(state: State): now = time.time() state.quit_presses.append(now) + logging.debug("maybe_quit: presses=%s", list(state.quit_presses)) if len(state.quit_presses) == 3 and (state.quit_presses[-1] - state.quit_presses[0]) <= 2.0: logging.info("Triple-press detected. Quitting and cleaning up...") cleanup_and_exit(state) @@ -265,6 +329,15 @@ def cleanup_and_exit(state: State): def _bind_hotkeys(state: State): + logging.debug( + "Binding hotkeys: capture=%s send=%s action3=%s reset=%s quit=%s toggle_mode=%s", + state.cfg.shortcut_capture, + state.cfg.shortcut_send, + state.cfg.shortcut_action3, + state.cfg.shortcut_reset, + state.cfg.shortcut_quit, + state.cfg.shortcut_toggle_mode, + ) keyboard.add_hotkey(state.cfg.shortcut_capture, lambda: capture_active_window(state)) keyboard.add_hotkey(state.cfg.shortcut_send, lambda: threading.Thread(target=send_to_openai, args=(state,), daemon=True).start()) keyboard.add_hotkey(state.cfg.shortcut_action3, lambda: threading.Thread(target=handle_action3, args=(state,), daemon=True).start()) @@ -280,6 +353,14 @@ def main(): ensure_dirs(cfg) captures_dir, response_path, log_path = data_paths(cfg) _setup_logging(log_path) + logging.debug( + "main: app_dir=%s captures_dir=%s response_file=%s log_file=%s debug_env=%s", + cfg.app_dir, + captures_dir, + response_path, + log_path, + _get_env_bool("BG_AGENT_DEBUG") or _get_env_bool("DEBUG"), + ) state = State(cfg, captures_dir, response_path)