commit 84a9c327bcf4288fbafdcad5315dcf277bae74be
parent b1738d8d282ca60a93ad65b88b9f0959537ad129
Author: Pablo Murad <pblmrd@gmail.com>
Date: Fri, 29 May 2026 19:14:46 -0300
xerxes
Diffstat:
13 files changed, 705 insertions(+), 300 deletions(-)
diff --git a/.env.example b/.env.example
@@ -9,48 +9,54 @@ OPENAI_API_KEY=
# YOUTUBE_PO_TOKEN=android.gvs+XXX
# ---------------------------------------------------------------------------
-# Modelos OpenAI (opcional, defaults sao seguros)
+# Modelos OpenAI (default = perfil economico / rapido)
# ---------------------------------------------------------------------------
-# Modelo de chat usado para sumario, conversao para PT-BR e deteccao de tipo.
-# Default: gpt-5-mini. Alternativas: gpt-5, gpt-5-nano, gpt-4.1, gpt-4.1-mini, gpt-4o-mini.
-# OPENAI_CHAT_MODEL=gpt-5-mini
+# Chat: sumario, conversao PT-BR. Default: gpt-4o-mini (barato e rapido).
+# Alternativas: gpt-5-nano, gpt-5-mini, gpt-4.1-mini.
+# OPENAI_CHAT_MODEL=gpt-4o-mini
-# Modelo padrao de transcricao (saida em texto).
-# Default: gpt-4o-mini-transcribe. Alternativas: gpt-4o-transcribe, whisper-1.
+# Transcricao. Default: gpt-4o-mini-transcribe.
# OPENAI_TRANSCRIBE_MODEL=gpt-4o-mini-transcribe
-# Modelo usado quando precisamos de timestamps (verbose_json) para gerar capitulos.
-# Default: whisper-1 (ainda e o mais confiavel para segments com start/end).
+# Timestamps para capitulos (verbose_json). Default: whisper-1.
# OPENAI_TRANSCRIBE_TIMESTAMPS_MODEL=whisper-1
-# Liga/desliga geracao de capitulos com timestamps em audio/video.
-# OPENAI_ENABLE_CHAPTERS=true
+# Capitulos com timestamps (STT mais lento). Default: false.
+# OPENAI_ENABLE_CHAPTERS=false
-# Liga/desliga sumario estruturado (TL;DR, key points, decisoes, action items, etc).
-# Quando false, mantem o sumario textual legado.
+# Sumario estruturado (SmartSummary). Default: true.
# OPENAI_ENABLE_SMART_SUMMARY=true
-# Esforco de raciocinio para modelos da familia gpt-5/o-series.
-# Valores validos: minimal, low, medium, high. Default: medium.
-# OPENAI_REASONING_EFFORT=medium
+# Raciocinio para modelos gpt-5/o-series. Default: minimal.
+# OPENAI_REASONING_EFFORT=minimal
# ---------------------------------------------------------------------------
-# Preset global e sumarizacao hierarquica (textos longos)
+# Preset global e sumarizacao
# ---------------------------------------------------------------------------
-# Preset global (unico controlo): economico | equilibrado | maximo.
-# Mapeia defaults de chat, transcricao e reasoning (sobrescritos por OPENAI_* quando definidos).
-# LAZIER_QUALITY_PRESET=equilibrado
+# economico | equilibrado | maximo. Default: economico.
+# LAZIER_QUALITY_PRESET=economico
-# Sumario map-reduce com overlap para transcricoes acima do limiar (caracteres).
# LAZIER_SUMMARY_HIERARCHICAL=true
-# LAZIER_SUMMARY_DIRECT_MAX_CHARS=32000
-# LAZIER_SUMMARY_MAP_CHUNK_CHARS=14000
-# LAZIER_SUMMARY_CHUNK_OVERLAP_CHARS=1400
+# LAZIER_SUMMARY_DIRECT_MAX_CHARS=48000
+# LAZIER_SUMMARY_MAP_CHUNK_CHARS=16000
+# LAZIER_SUMMARY_CHUNK_OVERLAP_CHARS=800
# ---------------------------------------------------------------------------
-# Spike STT / diarizacao (opcional, sem segundo fornecedor ligado por defeito)
+# Performance (menos chamadas = mais rapido e barato)
# ---------------------------------------------------------------------------
+# Revisao ortografica extra apos PT-BR (1+ chamada chat). Default: false.
+# LAZIER_ENABLE_PT_POLISH=false
+
+# Classificador de tipo antes do sumario (1 chamada chat). Default: false.
+# LAZIER_DETECT_CONTENT_TYPE=false
+
+# Paralelismo em chunks de audio e sumario. Default: 3.
+# LAZIER_STT_PARALLEL_WORKERS=3
+# LAZIER_SUMMARY_PARALLEL_WORKERS=3
+
+# LAZIER_ALWAYS_SUMMARY=false
+# LAZIER_DIARIZATION_PROVIDER=none
# LAZIER_ALT_STT_ENABLED=false
diff --git a/README.md b/README.md
@@ -28,18 +28,24 @@ Defaults seguros já vêm configurados; sobrescreva apenas se quiser mudar.
| Variável | Default | Descrição |
|----------|---------|-----------|
-| `OPENAI_CHAT_MODEL` | `gpt-5-mini` | Modelo de chat usado para sumário, conversão para PT-BR, detecção de tipo e capítulos. Alternativas: `gpt-5`, `gpt-5-nano`, `gpt-4.1`, `gpt-4o-mini`. |
+| `OPENAI_CHAT_MODEL` | `gpt-4o-mini` | Modelo de chat usado para sumário, conversão para PT-BR, detecção de tipo e capítulos. Alternativas: `gpt-5-mini`, `gpt-5-nano`, `gpt-4.1-mini`. |
| `OPENAI_TRANSCRIBE_MODEL` | `gpt-4o-mini-transcribe` | Modelo padrão de transcrição (saída em texto). Alternativas: `gpt-4o-transcribe`, `whisper-1`. |
| `OPENAI_TRANSCRIBE_TIMESTAMPS_MODEL` | `whisper-1` | Modelo usado quando precisamos de `verbose_json` para gerar capítulos. Atualmente `whisper-1` é o mais confiável para retornar `start`/`end`. |
| `OPENAI_ENABLE_SMART_SUMMARY` | `true` | Liga/desliga o sumário estruturado (TL;DR, pontos-chave, decisões, ações, tópicos, citações, perguntas em aberto). Quando `false`, mantém o sumário textual legado. |
-| `OPENAI_ENABLE_CHAPTERS` | `true` | Liga/desliga geração de capítulos com timestamps em áudio/vídeo. |
-| `OPENAI_REASONING_EFFORT` | `medium` | Esforço de raciocínio para modelos da família `gpt-5`/`o-series`. Valores: `minimal`, `low`, `medium`, `high`. |
-| `LAZIER_QUALITY_PRESET` | `equilibrado` | Preset global `economico` / `equilibrado` / `maximo` → defaults de chat, transcrição e `reasoning` (sobrescritos por `OPENAI_*` quando definidos). Defina apenas no `.env`; não há seleção na WebGUI nem campo por pedido na API. |
+| `OPENAI_ENABLE_CHAPTERS` | `false` | Capítulos com timestamps (exige STT `verbose_json`, mais lento). |
+| `OPENAI_REASONING_EFFORT` | `minimal` | Esforço de raciocínio para modelos da família `gpt-5`/`o-series`. Valores: `minimal`, `low`, `medium`, `high`. |
+| `LAZIER_QUALITY_PRESET` | `economico` | Preset global `economico` / `equilibrado` / `maximo` → defaults de chat, transcrição e `reasoning`. |
| `LAZIER_SUMMARY_HIERARCHICAL` | `true` | Acima de `LAZIER_SUMMARY_DIRECT_MAX_CHARS`, o sumário inteligente usa map-reduce com chunks e overlap em vez de um único passe sobre o texto completo. |
-| `LAZIER_SUMMARY_DIRECT_MAX_CHARS` | `32000` | Limiar (caracteres) para ativar sumarização hierárquica. |
-| `LAZIER_SUMMARY_MAP_CHUNK_CHARS` | `14000` | Tamanho alvo de cada chunk no map. |
-| `LAZIER_SUMMARY_CHUNK_OVERLAP_CHARS` | `1400` | Sobreposição entre chunks consecutivos. |
-| `LAZIER_ALT_STT_ENABLED` | `false` | Reserva para spike de segundo STT ou diarização; incluída para documentar critérios go/no-go sem ligar fornecedor alternativo por defeito. |
+| `LAZIER_SUMMARY_DIRECT_MAX_CHARS` | `48000` | Limiar (caracteres) para ativar sumarização hierárquica. |
+| `LAZIER_SUMMARY_MAP_CHUNK_CHARS` | `16000` | Tamanho alvo de cada chunk no map. |
+| `LAZIER_SUMMARY_CHUNK_OVERLAP_CHARS` | `800` | Sobreposição entre chunks consecutivos. |
+| `LAZIER_ENABLE_PT_POLISH` | `false` | Revisão ortográfica extra após PT-BR (mais chamadas chat). |
+| `LAZIER_DETECT_CONTENT_TYPE` | `false` | Classificador de tipo antes do sumário (1 chamada chat). |
+| `LAZIER_STT_PARALLEL_WORKERS` | `3` | Transcrição paralela de chunks de áudio. |
+| `LAZIER_SUMMARY_PARALLEL_WORKERS` | `3` | Sumário inteligente paralelo por chunk. |
+| `LAZIER_ALT_STT_ENABLED` | `false` | Reserva para spike de segundo STT ou diarização. |
+| `LAZIER_ALWAYS_SUMMARY` | `false` | Se `true`, força geração de sumário mesmo quando o modo pedido é só `transcribe` (override de instalação). |
+| `LAZIER_DIARIZATION_PROVIDER` | `none` | Diarização opcional de falantes (`none`, ou provedores futuros). |
Opcional: `YOUTUBE_PO_TOKEN` para melhor suporte a alguns vídeos do YouTube ([guia](https://github.com/yt-dlp/yt-dlp/wiki/PO-Token-Guide)).
@@ -93,16 +99,20 @@ Acesse **http://localhost:19283** (ou use `--port` para outra porta).
### CLI
```bash
-# Transcrição (gera o texto em PT-BR)
+# Só transcrição (texto completo em PT-BR — default, mais económico)
lazier transcribe audio.mp3
lazier transcribe video.mp4
lazier transcribe "https://www.youtube.com/watch?v=VIDEO_ID"
-# Sumário (texto + sumário inteligente + capítulos quando aplicável)
+# Só sumário (STT/extração internos; exporta apenas o sumário)
lazier summarize document.pdf
lazier summarize "https://example.com/artigo" --format md
lazier summarize aula.mp3 --gpt-model gpt-5 --reasoning high
-# Desligar features novas explicitamente
+
+# Transcrição + sumário (dois artefactos; maior consumo de tokens)
+lazier process aula.mp3 --format md
+
+# Desligar features opcionais
lazier summarize aula.mp3 --no-smart --no-chapters
# Outras opções
@@ -122,6 +132,28 @@ Flags principais:
O pacote custo/qualidade (`economico` / `equilibrado` / `maximo`) vem só de `LAZIER_QUALITY_PRESET` no `.env` (`lazier config` mostra o valor atual).
+### Economia de tokens
+
+| Modo | O que gera | Notas |
+|------|------------|-------|
+| `transcribe` | Transcrição PT-BR | Não chama sumário inteligente nem detecção de tipo de conteúdo. |
+| `summarize` | Sumário PT-BR | Áudio/vídeo ainda passam por STT; transcrição completa não é exportada. |
+| `process` | Transcrição + sumário | Dois ficheiros na WebGUI; custo total de chat. |
+
+Na WebGUI escolha **Transcrição**, **Sumário** ou **Ambos**. Na API, omitir `mode` equivale a `transcribe`. Use `"mode": "process"` ou `transcribe: true, summarize: true` para os dois.
+
+### Velocidade e custo (defaults)
+
+O perfil **económico** shipado prioriza latência e preço:
+
+- **`gpt-4o-mini`** para chat (conversão PT-BR e sumário).
+- **Capítulos desligados** — evita STT com `whisper-1` + geração de capítulos.
+- **Sem polish nem detecção de tipo** — poupa 1–3 chamadas chat por job.
+- **Áudio normalizado** mono 16 kHz antes do STT (ficheiros menores).
+- **Chunks STT/sumário em paralelo** (até 3 workers).
+
+Para máxima qualidade: `LAZIER_QUALITY_PRESET=maximo`, `OPENAI_ENABLE_CHAPTERS=true`, `LAZIER_ENABLE_PT_POLISH=true`.
+
### WebGUI
Acesse http://localhost:19283 após iniciar com `lazier web` ou Docker. Na primeira vez, faça login com o usuário e senha definidos em `ADMIN_USER` e `ADMIN_PASSWORD` no `.env` (se configurados).
@@ -134,7 +166,17 @@ Acesse http://localhost:19283 após iniciar com `lazier web` ou Docker. Na prime
{
"url": "https://example.com/aula.mp3",
"format": "md",
- "mode": "summarize",
+ "mode": "summarize"
+}
+```
+
+Modos: `transcribe` (default), `summarize`, `process` (ambos). Overrides opcionais:
+
+```json
+{
+ "url": "https://example.com/aula.mp3",
+ "format": "md",
+ "mode": "process",
"chat_model": "gpt-5",
"transcribe_model": "gpt-4o-transcribe",
"smart": true,
diff --git a/lazier/api/routes.py b/lazier/api/routes.py
@@ -4,6 +4,7 @@ Rotas da API FastAPI.
import logging
import os
+import shutil
import tempfile
import uuid
import zipfile
@@ -11,7 +12,7 @@ from datetime import datetime
from pathlib import Path
from typing import List, Optional, Tuple
-from fastapi import APIRouter, BackgroundTasks, File, Form, HTTPException, UploadFile
+from fastapi import APIRouter, BackgroundTasks, File, Form, HTTPException, Query, UploadFile
from fastapi.responses import FileResponse
from pydantic import BaseModel
@@ -31,6 +32,8 @@ router = APIRouter()
UPLOAD_DIR = Path(os.getenv("LAZIER_UPLOAD_DIR", "/app/uploads"))
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
+VALID_EXPORT_FORMATS = frozenset({"docx", "txt", "md", "json", "pdf"})
+
class ProcessRequest(BaseModel):
"""Request para processar URL."""
@@ -55,12 +58,18 @@ def _resolve_mode(
if mode not in {"transcribe", "summarize", "process"}:
raise HTTPException(
status_code=400,
- detail="Modo invalido. Use 'process' (padrao), 'transcribe' ou 'summarize'.",
+ detail="Modo invalido. Use 'transcribe' (padrao), 'summarize' ou 'process'.",
)
return mode
if transcribe is None and summarize is None:
- return "process"
+ return "transcribe"
+
+ if transcribe is False and summarize is False:
+ raise HTTPException(
+ status_code=400,
+ detail="Escolha transcrição, sumário ou ambos (transcribe/summarize/process).",
+ )
if transcribe and summarize:
return "process"
@@ -69,18 +78,125 @@ def _resolve_mode(
if summarize and not transcribe:
return "summarize"
- return "process"
+ return "transcribe"
+
+
+def _normalize_export_format(format_type: Optional[str]) -> str:
+ fmt = (format_type or "docx").strip().lower()
+ if fmt == "markdown":
+ fmt = "md"
+ if fmt not in VALID_EXPORT_FORMATS:
+ raise HTTPException(
+ status_code=400,
+ detail=f"Formato invalido: {format_type}. Use: docx, txt, md, json, pdf.",
+ )
+ return fmt
+
+
+def _job_stored_format(job: dict) -> str:
+ return _normalize_export_format(job.get("format") or "docx")
-def _download_filename(job: dict, artifact_kind: str) -> str:
+def _download_filename(job: dict, artifact_kind: str, format_type: Optional[str] = None) -> str:
return build_export_filename(
job.get("metadata", {}),
- job.get("format", "docx"),
+ _normalize_export_format(format_type or job.get("format")),
source_name=job.get("source_name"),
artifact_kind=artifact_kind,
)
+def _export_artifact_to_path(job: dict, artifact_kind: str, output_path: Path, format_type: str) -> None:
+ metadata = job.get("metadata", {}) or {}
+ fmt = _normalize_export_format(format_type)
+ if artifact_kind == "transcription":
+ export(
+ transcription=job["transcription"],
+ summary=None,
+ metadata=metadata,
+ output_path=str(output_path),
+ format_type=fmt,
+ )
+ elif artifact_kind == "summary":
+ export(
+ transcription="",
+ summary=job["summary"],
+ metadata=metadata,
+ output_path=str(output_path),
+ format_type=fmt,
+ )
+ else:
+ raise ValueError(f"Artefato invalido: {artifact_kind}")
+
+
+def _resolve_download_path(
+ job: dict,
+ artifact_kind: str,
+ format_type: Optional[str] = None,
+) -> Tuple[Optional[str], bool]:
+ """Resolve caminho de download. Retorna (path, efemero)."""
+
+ if artifact_kind == "result":
+ mode = job.get("mode")
+ artifact_kind = "transcription" if mode == "transcribe" else "summary"
+
+ fmt = _normalize_export_format(format_type or job.get("format"))
+ job_fmt = _job_stored_format(job)
+
+ if artifact_kind == "transcription" and not job.get("transcription"):
+ return None, False
+ if artifact_kind == "summary" and not job.get("summary"):
+ return None, False
+
+ path_key = {"transcription": "transcription_path", "summary": "summary_path"}.get(artifact_kind)
+ if not path_key:
+ return None, False
+
+ if fmt == job_fmt:
+ existing_path = job.get(path_key)
+ if existing_path and Path(existing_path).exists():
+ return existing_path, False
+
+ output_path = build_job_artifact_path(
+ job_id=job["id"],
+ source_name=job.get("source_name"),
+ format_type=fmt,
+ artifact_kind=artifact_kind,
+ created_at=job.get("created_at"),
+ metadata=job.get("metadata", {}),
+ )
+ _export_artifact_to_path(job, artifact_kind, output_path, fmt)
+ get_job_store().update_job(job["id"], **{path_key: str(output_path)})
+ return str(output_path), False
+
+ tmp_dir = Path(tempfile.mkdtemp(prefix="lazier-dl-"))
+ filename = build_export_filename(
+ job.get("metadata", {}),
+ fmt,
+ source_name=job.get("source_name"),
+ artifact_kind=artifact_kind,
+ )
+ output_path = tmp_dir / filename
+ _export_artifact_to_path(job, artifact_kind, output_path, fmt)
+ return str(output_path), True
+
+
+def _cleanup_download_path(path: str, ephemeral: bool) -> None:
+ if not ephemeral or not path:
+ return
+ file_path = Path(path)
+ parent = file_path.parent
+ if parent.name.startswith("lazier-dl-"):
+ shutil.rmtree(parent, ignore_errors=True)
+ elif file_path.exists():
+ file_path.unlink(missing_ok=True)
+
+
+def _ensure_download_file(job: dict, artifact_kind: str, format_type: Optional[str] = None) -> Optional[str]:
+ path, _ephemeral = _resolve_download_path(job, artifact_kind, format_type)
+ return path
+
+
def _job_title(job: dict) -> str:
metadata = job.get("metadata", {})
if metadata.get("title"):
@@ -228,67 +344,14 @@ def _process_job(job_id: str) -> None:
broadcast_progress(job_id, 0, "failed", str(exc))
-def _ensure_download_file(job: dict, artifact_kind: str) -> Optional[str]:
- existing_path_key = {
- "transcription": "transcription_path",
- "summary": "summary_path",
- "result": "result_path",
- }[artifact_kind]
- existing_path = job.get(existing_path_key)
- if existing_path and Path(existing_path).exists():
- return existing_path
-
- if artifact_kind == "transcription" and job.get("transcription"):
- output_path = build_job_artifact_path(
- job_id=job["id"],
- source_name=job.get("source_name"),
- format_type=job.get("format", "docx"),
- artifact_kind="transcription",
- created_at=job.get("created_at"),
- metadata=job.get("metadata", {}),
- )
- export(
- transcription=job["transcription"],
- summary=None,
- metadata=job.get("metadata", {}),
- output_path=str(output_path),
- format_type=job.get("format", "docx"),
- )
- get_job_store().update_job(job["id"], transcription_path=str(output_path))
- return str(output_path)
-
- if artifact_kind == "summary" and job.get("summary"):
- output_path = build_job_artifact_path(
- job_id=job["id"],
- source_name=job.get("source_name"),
- format_type=job.get("format", "docx"),
- artifact_kind="summary",
- created_at=job.get("created_at"),
- metadata=job.get("metadata", {}),
- )
- export(
- transcription="",
- summary=job["summary"],
- metadata=job.get("metadata", {}),
- output_path=str(output_path),
- format_type=job.get("format", "docx"),
- )
- get_job_store().update_job(job["id"], summary_path=str(output_path))
- return str(output_path)
-
- if artifact_kind == "result":
- mode = job.get("mode")
- preferred_key = "transcription" if mode == "transcribe" else "summary"
- return _ensure_download_file(job, preferred_key)
-
- return None
-
-
-def _require_distinct_transcription_and_summary_paths(job: dict) -> Tuple[str, str]:
- """Resolve paths for bundle ZIP or raise HTTPException."""
+def _require_distinct_transcription_and_summary_paths(
+ job: dict,
+ format_type: Optional[str] = None,
+) -> Tuple[str, str, bool, bool]:
+ """Resolve paths for bundle ZIP. Retorna (tx, sm, tx_ephemeral, sm_ephemeral)."""
- tx_path = _ensure_download_file(job, "transcription")
- sm_path = _ensure_download_file(job, "summary")
+ tx_path, tx_tmp = _resolve_download_path(job, "transcription", format_type)
+ sm_path, sm_tmp = _resolve_download_path(job, "summary", format_type)
if not tx_path or not sm_path:
raise HTTPException(
status_code=404,
@@ -299,7 +362,7 @@ def _require_distinct_transcription_and_summary_paths(job: dict) -> Tuple[str, s
status_code=400,
detail="Transcricao e sumario referem-se ao mesmo ficheiro; pacote ZIP indisponivel.",
)
- return (tx_path, sm_path)
+ return tx_path, sm_path, tx_tmp, sm_tmp
def _unlink_quiet(path: str) -> None:
@@ -453,63 +516,85 @@ async def get_job_details(job_id: str):
@router.get("/jobs/{job_id}/transcription")
-async def download_transcription(job_id: str):
- """Download da transcricao."""
+async def download_transcription(
+ job_id: str,
+ background_tasks: BackgroundTasks,
+ format: Optional[str] = Query(None, description="docx, txt, md, json ou pdf"),
+):
+ """Download da transcricao no formato pedido."""
job = get_job_store().get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job nao encontrado")
- download_path = _ensure_download_file(job, "transcription")
+ download_path, ephemeral = _resolve_download_path(job, "transcription", format)
if not download_path:
raise HTTPException(status_code=404, detail="Transcricao nao disponivel")
- filename = _download_filename(job, "transcription")
+ filename = _download_filename(job, "transcription", format)
+ if ephemeral:
+ background_tasks.add_task(_cleanup_download_path, download_path, True)
return FileResponse(download_path, media_type="application/octet-stream", filename=filename)
@router.get("/jobs/{job_id}/summary")
-async def download_summary(job_id: str):
- """Download do sumario."""
+async def download_summary(
+ job_id: str,
+ background_tasks: BackgroundTasks,
+ format: Optional[str] = Query(None, description="docx, txt, md, json ou pdf"),
+):
+ """Download do sumario no formato pedido."""
job = get_job_store().get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job nao encontrado")
- download_path = _ensure_download_file(job, "summary")
+ download_path, ephemeral = _resolve_download_path(job, "summary", format)
if not download_path:
raise HTTPException(status_code=404, detail="Sumario nao disponivel")
- filename = _download_filename(job, "summary")
+ filename = _download_filename(job, "summary", format)
+ if ephemeral:
+ background_tasks.add_task(_cleanup_download_path, download_path, True)
return FileResponse(download_path, media_type="application/octet-stream", filename=filename)
@router.get("/jobs/{job_id}/download")
-async def download_result(job_id: str):
+async def download_result(
+ job_id: str,
+ background_tasks: BackgroundTasks,
+ format: Optional[str] = Query(None, description="docx, txt, md, json ou pdf"),
+):
"""Download do artefato principal do job."""
job = get_job_store().get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job nao encontrado")
- download_path = _ensure_download_file(job, "result")
+ download_path, ephemeral = _resolve_download_path(job, "result", format)
if not download_path:
raise HTTPException(status_code=404, detail="Arquivo de resultado nao encontrado")
artifact_kind = "transcription" if job.get("mode") == "transcribe" else "summary"
- filename = _download_filename(job, artifact_kind)
+ filename = _download_filename(job, artifact_kind, format)
+ if ephemeral:
+ background_tasks.add_task(_cleanup_download_path, download_path, True)
return FileResponse(download_path, media_type="application/octet-stream", filename=filename)
@router.get("/jobs/{job_id}/download-bundle")
-async def download_bundle(job_id: str, background_tasks: BackgroundTasks):
- """Download ZIP com transcricao e sumario em ficheiros separados."""
+async def download_bundle(
+ job_id: str,
+ background_tasks: BackgroundTasks,
+ format: Optional[str] = Query(None, description="docx, txt, md, json ou pdf"),
+):
+ """Download ZIP com transcricao e sumario no formato pedido."""
job = get_job_store().get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job nao encontrado")
- tx_path, sm_path = _require_distinct_transcription_and_summary_paths(job)
+ tx_path, sm_path, tx_tmp, sm_tmp = _require_distinct_transcription_and_summary_paths(job, format)
fd, tmp_name = tempfile.mkstemp(suffix=".zip")
os.close(fd)
@@ -529,6 +614,10 @@ async def download_bundle(job_id: str, background_tasks: BackgroundTasks):
artifact_kind="result",
)
background_tasks.add_task(_unlink_quiet, str(tmp_path))
+ if tx_tmp:
+ background_tasks.add_task(_cleanup_download_path, tx_path, True)
+ if sm_tmp:
+ background_tasks.add_task(_cleanup_download_path, sm_path, True)
return FileResponse(str(tmp_path), media_type="application/zip", filename=zip_filename)
diff --git a/lazier/audio_processor.py b/lazier/audio_processor.py
@@ -202,6 +202,42 @@ def split_audio(audio_path: str, chunk_size_mb: int = 24) -> list[str]:
raise Exception(f"Erro ao dividir áudio: {str(e)}")
+def normalize_audio_for_stt(input_path: str) -> str:
+ """
+ Normaliza audio para STT: mono, 16 kHz, MP3 64 kbps.
+ Ficheiros menores = upload e transcricao mais rapidos.
+ """
+ if not check_ffmpeg():
+ return input_path
+
+ path_obj = Path(input_path)
+ output_path = path_obj.parent / f"{path_obj.stem}_stt.mp3"
+
+ cmd = [
+ "ffmpeg",
+ "-i",
+ input_path,
+ "-vn",
+ "-ac",
+ "1",
+ "-ar",
+ "16000",
+ "-acodec",
+ "libmp3lame",
+ "-b:a",
+ "64k",
+ "-y",
+ str(output_path),
+ ]
+ try:
+ subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
+ if output_path.exists():
+ return str(output_path)
+ except subprocess.CalledProcessError:
+ pass
+ return input_path
+
+
def prepare_audio_file(input_path: str, is_video: bool = False) -> str:
"""
Prepara arquivo de áudio para transcrição
@@ -215,10 +251,11 @@ def prepare_audio_file(input_path: str, is_video: bool = False) -> str:
Caminho do arquivo de áudio preparado
"""
if is_video:
- return extract_audio_from_video(input_path)
+ prepared = extract_audio_from_video(input_path)
else:
- # Tenta converter para mp3 se necessário (formato mais compatível)
audio_ext = Path(input_path).suffix.lower()
- if audio_ext not in ['.mp3', '.wav']:
- return convert_audio_format(input_path, 'mp3')
- return input_path
+ if audio_ext not in [".mp3", ".wav", ".m4a", ".ogg"]:
+ prepared = convert_audio_format(input_path, "mp3")
+ else:
+ prepared = input_path
+ return normalize_audio_for_stt(prepared)
diff --git a/lazier/cli.py b/lazier/cli.py
@@ -129,9 +129,10 @@ def cli(ctx, input_path):
if input_path:
console.print(
Panel.fit(
- "Fluxo unificado: transcrição em PT-BR + sumário (ajuste com LAZIER_ALWAYS_SUMMARY).\n\n"
- "`lazier process <input>` — fluxo unificado explícito.\n"
- "`lazier transcribe` / `lazier summarize` — modos legados (ver LAZIER_ALWAYS_SUMMARY).",
+ "Escolha o que gerar para economizar tokens:\n\n"
+ "`lazier transcribe <input>` — só transcrição em PT-BR\n"
+ "`lazier summarize <input>` — só sumário\n"
+ "`lazier process <input>` — transcrição + sumário",
title="Modo de Uso",
)
)
@@ -186,7 +187,7 @@ def transcribe(
chapters_flag: Optional[bool],
reasoning: Optional[str],
):
- """Converte/transcreve para PT-BR; com LAZIER_ALWAYS_SUMMARY=true também gera sumário."""
+ """Gera só a transcrição em PT-BR (sem sumário, salvo LAZIER_ALWAYS_SUMMARY=true)."""
_run_mode(
input_path=input_path,
mode="transcribe",
@@ -215,7 +216,7 @@ def summarize(
chapters_flag: Optional[bool],
reasoning: Optional[str],
):
- """Foco em sumário; com LAZIER_ALWAYS_SUMMARY=true também exporta transcrição completa."""
+ """Gera só o sumário em PT-BR (STT/extração internos; transcrição não é exportada)."""
_run_mode(
input_path=input_path,
mode="summarize",
@@ -244,7 +245,7 @@ def process(
chapters_flag: Optional[bool],
reasoning: Optional[str],
):
- """Executa o pipeline unificado (transcrição + sumário em PT-BR)."""
+ """Gera transcrição e sumário em PT-BR (maior consumo de tokens)."""
_run_mode(
input_path=input_path,
mode="process",
@@ -272,8 +273,12 @@ def config():
f"Smart summary: [cyan]{'on' if cfg.enable_smart_summary else 'off'}[/cyan]\n"
f"Capitulos: [cyan]{'on' if cfg.enable_chapters else 'off'}[/cyan]\n"
f"Preset qualidade: [cyan]{cfg.quality_preset}[/cyan]\n"
+ f"Polish PT-BR extra: [cyan]{'on' if cfg.enable_pt_polish else 'off'}[/cyan]\n"
+ f"Detectar tipo conteudo: [cyan]{'on' if cfg.detect_content_type else 'off'}[/cyan]\n"
+ f"STT paralelo (workers): [cyan]{cfg.stt_parallel_workers}[/cyan]\n"
+ f"Sumario paralelo (workers): [cyan]{cfg.summary_parallel_workers}[/cyan]\n"
f"Sumario hierarquico: [cyan]{'on' if cfg.hierarchical_summary else 'off'}[/cyan]\n"
- f"Sempre gerar sumario (LAZIER_ALWAYS_SUMMARY): [cyan]{'on' if cfg.always_summary else 'off'}[/cyan]\n"
+ f"Sempre gerar sumario (LAZIER_ALWAYS_SUMMARY): [cyan]{'on' if cfg.always_summary else 'off'}[/cyan] — override admin\n"
f"Diarizacao (LAZIER_DIARIZATION_PROVIDER): [cyan]{cfg.diarization_provider or 'none'}[/cyan]\n"
f"STT alternativo (flag): [cyan]{'on' if cfg.alt_stt_enabled else 'off'}[/cyan]",
title="Lazier - Config Atual",
diff --git a/lazier/core/config.py b/lazier/core/config.py
@@ -22,15 +22,13 @@ except ImportError: # pragma: no cover
load_dotenv()
-# Defaults shipados (Maio 2026):
-# - chat: gpt-5-mini -> equilibrio qualidade/custo
-# - transcribe: gpt-4o-mini-transcribe -> sucessor de whisper-1, mais preciso
-# - timestamps: whisper-1 -> ainda e a forma confiavel de obter `verbose_json`
-# com segments para construir capitulos.
-DEFAULT_CHAT_MODEL = "gpt-5-mini"
+# Defaults shipados (Maio 2026) — perfil economico: rapido e barato.
+# Sobrescreva via LAZIER_QUALITY_PRESET=maximo ou OPENAI_* no .env.
+DEFAULT_CHAT_MODEL = "gpt-4o-mini"
DEFAULT_TRANSCRIBE_MODEL = "gpt-4o-mini-transcribe"
DEFAULT_TRANSCRIBE_TIMESTAMPS_MODEL = "whisper-1"
-DEFAULT_REASONING_EFFORT = "medium"
+DEFAULT_REASONING_EFFORT = "minimal"
+DEFAULT_QUALITY_PRESET = "economico"
VALID_REASONING_EFFORTS = {"minimal", "low", "medium", "high"}
@@ -39,7 +37,7 @@ VALID_QUALITY_PRESETS = frozenset({"economico", "equilibrado", "maximo"})
# Presets: custo vs qualidade (sobrescritos por OPENAI_* quando definidos).
QUALITY_PRESETS: Dict[str, Dict[str, str]] = {
"economico": {
- "chat_model": "gpt-5-nano",
+ "chat_model": "gpt-4o-mini",
"transcribe_model": "gpt-4o-mini-transcribe",
"reasoning_effort": "minimal",
},
@@ -57,8 +55,8 @@ QUALITY_PRESETS: Dict[str, Dict[str, str]] = {
def resolve_quality_preset(name: Optional[str]) -> str:
- key = (name or "equilibrado").strip().lower()
- return key if key in VALID_QUALITY_PRESETS else "equilibrado"
+ key = (name or DEFAULT_QUALITY_PRESET).strip().lower()
+ return key if key in VALID_QUALITY_PRESETS else DEFAULT_QUALITY_PRESET
def get_preset_model_defaults(preset: Optional[str]) -> Dict[str, str]:
@@ -98,17 +96,21 @@ class ModelConfig:
chat_model: str = DEFAULT_CHAT_MODEL
transcribe_model: str = DEFAULT_TRANSCRIBE_MODEL
transcribe_timestamps_model: str = DEFAULT_TRANSCRIBE_TIMESTAMPS_MODEL
- enable_chapters: bool = True
+ enable_chapters: bool = False
enable_smart_summary: bool = True
reasoning_effort: str = DEFAULT_REASONING_EFFORT
- quality_preset: str = "equilibrado"
+ quality_preset: str = DEFAULT_QUALITY_PRESET
hierarchical_summary: bool = True
- summary_direct_max_chars: int = 32_000
- summary_map_chunk_chars: int = 14_000
- summary_chunk_overlap_chars: int = 1_400
+ summary_direct_max_chars: int = 48_000
+ summary_map_chunk_chars: int = 16_000
+ summary_chunk_overlap_chars: int = 800
alt_stt_enabled: bool = False
- always_summary: bool = True
+ always_summary: bool = False
diarization_provider: str = "none"
+ enable_pt_polish: bool = False
+ detect_content_type: bool = False
+ stt_parallel_workers: int = 3
+ summary_parallel_workers: int = 3
def supports_reasoning(self, model: Optional[str] = None) -> bool:
"""Indica se devemos enviar `reasoning_effort` para um modelo."""
@@ -133,7 +135,7 @@ def get_model_config(refresh: bool = False) -> ModelConfig:
if _cached_config is not None and not refresh:
return _cached_config
- preset_key = resolve_quality_preset(_env_str("LAZIER_QUALITY_PRESET", "equilibrado"))
+ preset_key = resolve_quality_preset(_env_str("LAZIER_QUALITY_PRESET", DEFAULT_QUALITY_PRESET))
pd = QUALITY_PRESETS[preset_key]
reasoning = _env_str("OPENAI_REASONING_EFFORT", pd["reasoning_effort"]).lower()
@@ -146,17 +148,21 @@ def get_model_config(refresh: bool = False) -> ModelConfig:
transcribe_timestamps_model=_env_str(
"OPENAI_TRANSCRIBE_TIMESTAMPS_MODEL", DEFAULT_TRANSCRIBE_TIMESTAMPS_MODEL
),
- enable_chapters=_env_bool("OPENAI_ENABLE_CHAPTERS", True),
+ enable_chapters=_env_bool("OPENAI_ENABLE_CHAPTERS", False),
enable_smart_summary=_env_bool("OPENAI_ENABLE_SMART_SUMMARY", True),
reasoning_effort=reasoning,
quality_preset=preset_key,
hierarchical_summary=_env_bool("LAZIER_SUMMARY_HIERARCHICAL", True),
- summary_direct_max_chars=_env_int("LAZIER_SUMMARY_DIRECT_MAX_CHARS", 32_000, min_value=2_000),
- summary_map_chunk_chars=_env_int("LAZIER_SUMMARY_MAP_CHUNK_CHARS", 14_000, min_value=2_000),
- summary_chunk_overlap_chars=_env_int("LAZIER_SUMMARY_CHUNK_OVERLAP_CHARS", 1_400, min_value=0),
+ summary_direct_max_chars=_env_int("LAZIER_SUMMARY_DIRECT_MAX_CHARS", 48_000, min_value=2_000),
+ summary_map_chunk_chars=_env_int("LAZIER_SUMMARY_MAP_CHUNK_CHARS", 16_000, min_value=2_000),
+ summary_chunk_overlap_chars=_env_int("LAZIER_SUMMARY_CHUNK_OVERLAP_CHARS", 800, min_value=0),
alt_stt_enabled=_env_bool("LAZIER_ALT_STT_ENABLED", False),
- always_summary=_env_bool("LAZIER_ALWAYS_SUMMARY", True),
+ always_summary=_env_bool("LAZIER_ALWAYS_SUMMARY", False),
diarization_provider=_env_str("LAZIER_DIARIZATION_PROVIDER", "none").lower(),
+ enable_pt_polish=_env_bool("LAZIER_ENABLE_PT_POLISH", False),
+ detect_content_type=_env_bool("LAZIER_DETECT_CONTENT_TYPE", False),
+ stt_parallel_workers=_env_int("LAZIER_STT_PARALLEL_WORKERS", 3, min_value=1),
+ summary_parallel_workers=_env_int("LAZIER_SUMMARY_PARALLEL_WORKERS", 3, min_value=1),
)
return _cached_config
diff --git a/lazier/core/processing.py b/lazier/core/processing.py
@@ -8,7 +8,7 @@ Inclui as etapas de:
- Diarizacao opcional (LAZIER_DIARIZATION_PROVIDER) antes da conversao para PT-BR
- Conversao para PT-BR + revisao ortografica leve (polish_pt_br_text)
- Deteccao de tipo de conteudo
-- Sumario (legado ou estruturado; LAZIER_ALWAYS_SUMMARY=false recupera fluxos antigos)
+- Sumario (legado ou estruturado; LAZIER_ALWAYS_SUMMARY forca sumario mesmo em transcribe)
- Capitulos com timestamps
- Export final (dois artefactos quando transcrição + sumário e saida por job)
"""
@@ -98,7 +98,25 @@ def _ensure_mode(mode: str) -> str:
return mode
-def _maybe_polish_pt_br(text: Optional[str], kind: str, runtime: Dict[str, Any]) -> Optional[str]:
+def _wants_summary(mode: str, always_summary: bool) -> bool:
+ return mode in {"summarize", "process"} or always_summary
+
+
+def _wants_transcription_output(mode: str) -> bool:
+ return mode in {"transcribe", "process"}
+
+
+def _wants_summary_output(mode: str) -> bool:
+ return mode in {"summarize", "process"}
+
+
+def _maybe_polish_pt_br(
+ text: Optional[str],
+ kind: str,
+ runtime: Dict[str, Any],
+) -> Optional[str]:
+ if not runtime.get("pt_polish_enabled", False):
+ return text
if not text or not str(text).strip():
return text
try:
@@ -136,6 +154,8 @@ def _resolve_runtime(
"reasoning_effort": config.reasoning_effort,
"quality_preset": config.quality_preset,
"always_summary": config.always_summary,
+ "pt_polish_enabled": config.enable_pt_polish,
+ "detect_content_type": config.detect_content_type,
}
@@ -267,6 +287,9 @@ def _detect_and_attach_content_type(
return None
if metadata.get("content_type"):
return metadata["content_type"]
+ if not runtime.get("detect_content_type", False):
+ metadata.setdefault("content_type", "other")
+ return metadata["content_type"]
detection = detect_content_type(text, metadata=metadata, model=runtime["chat_model"])
metadata["content_type"] = detection.get("content_type", "other")
metadata["content_type_confidence"] = detection.get("confidence", 0.0)
@@ -597,10 +620,16 @@ def process_source(
metadata = {**metadata, "webpage_url": source}
# ---- Etapas pos-transcricao comuns a todos os tipos ----
- if portuguese_text:
+ always_sum = runtime["always_summary"]
+ wants_summary = _wants_summary(mode, always_sum)
+
+ if not wants_summary and summary:
+ summary = None
+
+ if portuguese_text and _wants_transcription_output(mode):
portuguese_text = _maybe_polish_pt_br(portuguese_text, "transcription", runtime) or ""
- if portuguese_text:
+ if portuguese_text and wants_summary:
_detect_and_attach_content_type(portuguese_text, metadata, runtime)
chapters = _build_chapters_if_possible(
@@ -610,11 +639,8 @@ def process_source(
progress_callback=progress_callback,
)
- always_sum = runtime["always_summary"]
text_for_summary = (portuguese_text or "").strip()
- should_build_summary = bool(text_for_summary) and (
- always_sum or mode in {"summarize", "process"}
- )
+ should_build_summary = bool(text_for_summary) and wants_summary
if should_build_summary and not summary:
summary = _build_summary(
@@ -636,7 +662,7 @@ def process_source(
"open_questions": [],
}
- if summary:
+ if summary and _wants_summary_output(mode):
summary = _maybe_polish_pt_br(summary, "summary", runtime)
# ---- Atualiza caches "ricos" para fontes de midia ----
@@ -683,14 +709,9 @@ def process_source(
resolved_source_name = source_name or metadata.get("title") or source
- has_tx = bool(portuguese_text and str(portuguese_text).strip())
- has_sm = bool(summary and str(summary).strip())
- dual_artifacts = (
- not output_path
- and has_tx
- and has_sm
- and (runtime["always_summary"] or mode == "process")
- )
+ has_tx = bool(portuguese_text and str(portuguese_text).strip()) and _wants_transcription_output(mode)
+ has_sm = bool(summary and str(summary).strip()) and _wants_summary_output(mode)
+ dual_artifacts = not output_path and has_tx and has_sm and mode == "process"
_notify(progress_callback, 92, "processing", f"Gerando arquivo {output_format.upper()}...")
@@ -799,16 +820,18 @@ def process_source(
raise Exception("Nenhum conteudo gerado para exportar.")
output_dir = str(Path(result_path or ".").parent)
+ result_transcription = portuguese_text if _wants_transcription_output(mode) else None
+ result_summary = summary if _wants_summary_output(mode) else None
result = {
"mode": mode,
"input_type": input_type,
"source_name": resolved_source_name,
"metadata": metadata,
- "transcription": portuguese_text,
- "summary": summary,
- "smart_summary": metadata.get("smart_summary"),
- "chapters": chapters,
- "content_type": metadata.get("content_type"),
+ "transcription": result_transcription,
+ "summary": result_summary,
+ "smart_summary": metadata.get("smart_summary") if _wants_summary_output(mode) else None,
+ "chapters": chapters if _wants_transcription_output(mode) or _wants_summary_output(mode) else [],
+ "content_type": metadata.get("content_type") if wants_summary else None,
"result_path": result_path,
"transcription_path": transcription_path,
"summary_path": summary_path,
diff --git a/lazier/summarizer.py b/lazier/summarizer.py
@@ -16,7 +16,8 @@ from __future__ import annotations
import json
import os
-from typing import Any, Dict, List, Optional
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from typing import Any, Dict, List, Optional, Tuple
try:
from openai import OpenAI
@@ -535,21 +536,14 @@ def summarize_smart(
language,
reasoning_effort=reasoning_effort,
).model_dump()
- parciais_h: List[SmartSummary] = []
- for index, chunk in enumerate(chunks):
- print(f"Sumario inteligente parte {index + 1}/{len(chunks)}...")
- try:
- parciais_h.append(
- _summarize_smart_chunk(
- chunk,
- chosen_model,
- content_type,
- language,
- reasoning_effort=reasoning_effort,
- )
- )
- except Exception as exc:
- print(f"Erro ao sumarizar parte {index + 1}: {exc}")
+ parciais_h = _collect_smart_partials(
+ chunks,
+ chosen_model,
+ content_type,
+ language,
+ reasoning_effort,
+ label="Sumario inteligente hierarquico",
+ )
if not parciais_h:
return SmartSummary(tldr="Falha ao gerar sumario: todas as partes falharam.").model_dump()
if len(parciais_h) == 1:
@@ -573,21 +567,14 @@ def summarize_smart(
).model_dump()
print(f"Texto longo detectado ({len(text)} chars) - rodando sumario inteligente em {len(chunks)} partes.")
- parciais: List[SmartSummary] = []
- for index, chunk in enumerate(chunks):
- print(f"Sumario inteligente parte {index + 1}/{len(chunks)}...")
- try:
- parciais.append(
- _summarize_smart_chunk(
- chunk,
- chosen_model,
- content_type,
- language,
- reasoning_effort=reasoning_effort,
- )
- )
- except Exception as exc:
- print(f"Erro ao sumarizar parte {index + 1}: {exc}")
+ parciais = _collect_smart_partials(
+ chunks,
+ chosen_model,
+ content_type,
+ language,
+ reasoning_effort,
+ label="Sumario inteligente",
+ )
if not parciais:
return SmartSummary(tldr="Falha ao gerar sumario: todas as partes falharam.").model_dump()
@@ -674,6 +661,52 @@ def _summarize_smart_chunk(
return _summarize_smart_fallback_json(client, model, messages, reasoning_effort=reasoning_effort)
+def _collect_smart_partials(
+ chunks: List[str],
+ chosen_model: str,
+ content_type: Optional[str],
+ language: str,
+ reasoning_effort: Optional[str],
+ *,
+ label: str = "Sumario inteligente",
+) -> List[SmartSummary]:
+ """Map de chunks para SmartSummary; paralelo quando LAZIER_SUMMARY_PARALLEL_WORKERS > 1."""
+ config = get_model_config()
+ total = len(chunks)
+ workers = min(config.summary_parallel_workers, total) if total > 1 else 1
+
+ def _one(index: int, chunk: str) -> Tuple[int, Optional[SmartSummary]]:
+ print(f"{label} parte {index + 1}/{total}...")
+ try:
+ return index, _summarize_smart_chunk(
+ chunk,
+ chosen_model,
+ content_type,
+ language,
+ reasoning_effort=reasoning_effort,
+ )
+ except Exception as exc:
+ print(f"Erro ao sumarizar parte {index + 1}: {exc}")
+ return index, None
+
+ if workers <= 1:
+ ordered: List[SmartSummary] = []
+ for index, chunk in enumerate(chunks):
+ _, partial = _one(index, chunk)
+ if partial:
+ ordered.append(partial)
+ return ordered
+
+ slots: List[Optional[SmartSummary]] = [None] * total
+ with ThreadPoolExecutor(max_workers=workers) as pool:
+ futures = [pool.submit(_one, index, chunk) for index, chunk in enumerate(chunks)]
+ for future in as_completed(futures):
+ idx, partial = future.result()
+ if partial:
+ slots[idx] = partial
+ return [item for item in slots if item is not None]
+
+
def _summarize_smart_fallback_json(
client: "OpenAI",
model: str,
diff --git a/lazier/transcriber.py b/lazier/transcriber.py
@@ -11,13 +11,15 @@ Suporta dois modos:
forma mais confiavel de obter `verbose_json`.
Ambos lidam com chunking automatico para arquivos > 24MB (limite da API e ~25MB).
+Chunks podem ser transcritos em paralelo (LAZIER_STT_PARALLEL_WORKERS).
"""
from __future__ import annotations
import os
+from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple
try:
from openai import OpenAI
@@ -32,7 +34,7 @@ except ImportError: # pragma: no cover - ambiente sem python-dotenv
load_dotenv()
-from .audio_processor import split_audio
+from .audio_processor import get_audio_duration, split_audio
from .core.config import get_model_config
@@ -69,6 +71,64 @@ def _resolve_chunks(audio_path: str) -> List[str]:
return split_audio(audio_path, chunk_size_mb=24)
+def _transcribe_plain_chunk(
+ client: "OpenAI",
+ chunk_path: str,
+ chosen_model: str,
+ language: Optional[str],
+ index: int,
+ total: int,
+) -> Tuple[int, str]:
+ if total > 1:
+ print(f"Processando chunk {index + 1}/{total}...")
+ with open(chunk_path, "rb") as audio_file:
+ request_kwargs: Dict[str, Any] = {
+ "model": chosen_model,
+ "file": audio_file,
+ "response_format": "text",
+ }
+ if language:
+ request_kwargs["language"] = language
+ transcript = client.audio.transcriptions.create(**request_kwargs)
+
+ if hasattr(transcript, "text"):
+ text = transcript.text
+ elif isinstance(transcript, str):
+ text = transcript
+ else:
+ text = str(transcript)
+ return index, (text or "").strip()
+
+
+def _transcribe_verbose_chunk(
+ client: "OpenAI",
+ chunk_path: str,
+ chosen_model: str,
+ language: Optional[str],
+ index: int,
+ total: int,
+) -> Tuple[int, Dict[str, Any]]:
+ if total > 1:
+ print(f"Processando chunk {index + 1}/{total} (com timestamps)...")
+ with open(chunk_path, "rb") as audio_file:
+ request_kwargs: Dict[str, Any] = {
+ "model": chosen_model,
+ "file": audio_file,
+ "response_format": "verbose_json",
+ }
+ if language:
+ request_kwargs["language"] = language
+ response = client.audio.transcriptions.create(**request_kwargs)
+ return index, _normalize_verbose_response(response)
+
+
+def _parallel_workers(total_chunks: int) -> int:
+ config = get_model_config()
+ if total_chunks <= 1:
+ return 1
+ return min(config.stt_parallel_workers, total_chunks)
+
+
def transcribe_audio(
audio_path: str,
language: Optional[str] = None,
@@ -85,30 +145,36 @@ def transcribe_audio(
try:
chunks = _resolve_chunks(audio_path)
- transcriptions: List[str] = []
total = len(chunks)
- for index, chunk_path in enumerate(chunks):
- if total > 1:
- print(f"Processando chunk {index + 1}/{total}...")
- with open(chunk_path, "rb") as audio_file:
- request_kwargs: Dict[str, Any] = {
- "model": chosen_model,
- "file": audio_file,
- "response_format": "text",
- }
- if language:
- request_kwargs["language"] = language
- transcript = client.audio.transcriptions.create(**request_kwargs)
-
- if hasattr(transcript, "text"):
- text = transcript.text
- elif isinstance(transcript, str):
- text = transcript
- else:
- text = str(transcript)
- transcriptions.append((text or "").strip())
-
- return " ".join(part for part in transcriptions if part)
+ workers = _parallel_workers(total)
+
+ if workers <= 1:
+ transcriptions: List[str] = []
+ for index, chunk_path in enumerate(chunks):
+ _, text = _transcribe_plain_chunk(
+ client, chunk_path, chosen_model, language, index, total
+ )
+ transcriptions.append(text)
+ return " ".join(part for part in transcriptions if part)
+
+ ordered: List[str] = [""] * total
+ with ThreadPoolExecutor(max_workers=workers) as pool:
+ futures = [
+ pool.submit(
+ _transcribe_plain_chunk,
+ client,
+ chunk_path,
+ chosen_model,
+ language,
+ index,
+ total,
+ )
+ for index, chunk_path in enumerate(chunks)
+ ]
+ for future in as_completed(futures):
+ idx, text = future.result()
+ ordered[idx] = text
+ return " ".join(part for part in ordered if part)
except Exception as exc:
raise _wrap_openai_error(exc) from exc
@@ -118,18 +184,7 @@ def transcribe_audio_with_timestamps(
language: Optional[str] = None,
model: Optional[str] = None,
) -> Dict[str, Any]:
- """Transcreve com `verbose_json` retornando texto e segmentos com timestamps.
-
- Returns:
- {
- "text": str, # texto completo concatenado
- "segments": [ # lista ordenada com tempos absolutos
- {"start": float, "end": float, "text": str},
- ...
- ],
- "duration": float | None, # duracao total estimada (s)
- }
- """
+ """Transcreve com `verbose_json` retornando texto e segmentos com timestamps."""
if not os.path.exists(audio_path):
raise FileNotFoundError(f"Arquivo de audio nao encontrado: {audio_path}")
@@ -140,33 +195,58 @@ def transcribe_audio_with_timestamps(
try:
chunks = _resolve_chunks(audio_path)
+ total = len(chunks)
+ workers = _parallel_workers(total)
+
+ offsets: List[float] = []
+ acc = 0.0
+ for chunk_path in chunks:
+ offsets.append(acc)
+ duration = get_audio_duration(chunk_path)
+ acc += duration if duration > 0 else 0.0
+
+ if workers <= 1:
+ payloads = []
+ for index, chunk_path in enumerate(chunks):
+ _, payload = _transcribe_verbose_chunk(
+ client, chunk_path, chosen_model, language, index, total
+ )
+ payloads.append(payload)
+ else:
+ payloads = [None] * total # type: ignore[list-item]
+ with ThreadPoolExecutor(max_workers=workers) as pool:
+ futures = [
+ pool.submit(
+ _transcribe_verbose_chunk,
+ client,
+ chunk_path,
+ chosen_model,
+ language,
+ index,
+ total,
+ )
+ for index, chunk_path in enumerate(chunks)
+ ]
+ for future in as_completed(futures):
+ idx, payload = future.result()
+ payloads[idx] = payload
+
all_segments: List[Dict[str, Any]] = []
all_text_parts: List[str] = []
offset_seconds = 0.0
- total = len(chunks)
- for index, chunk_path in enumerate(chunks):
- if total > 1:
- print(f"Processando chunk {index + 1}/{total} (com timestamps)...")
- with open(chunk_path, "rb") as audio_file:
- request_kwargs: Dict[str, Any] = {
- "model": chosen_model,
- "file": audio_file,
- "response_format": "verbose_json",
- }
- if language:
- request_kwargs["language"] = language
- response = client.audio.transcriptions.create(**request_kwargs)
-
- payload = _normalize_verbose_response(response)
+ for index, payload in enumerate(payloads):
+ if not payload:
+ continue
+ chunk_offset = offsets[index] if index < len(offsets) else offset_seconds
chunk_duration = payload.get("duration")
chunk_segments = payload.get("segments") or []
for seg in chunk_segments:
all_segments.append(
{
- "start": float(seg.get("start", 0.0)) + offset_seconds,
- "end": float(seg.get("end", 0.0)) + offset_seconds,
+ "start": float(seg.get("start", 0.0)) + chunk_offset,
+ "end": float(seg.get("end", 0.0)) + chunk_offset,
"text": (seg.get("text") or "").strip(),
}
)
@@ -175,10 +255,9 @@ def transcribe_audio_with_timestamps(
if chunk_text:
all_text_parts.append(chunk_text)
- if isinstance(chunk_duration, (int, float)):
- offset_seconds += float(chunk_duration)
+ if isinstance(chunk_duration, (int, float)) and float(chunk_duration) > 0:
+ offset_seconds = max(offset_seconds, chunk_offset + float(chunk_duration))
elif chunk_segments:
- # Fallback: usa o final do ultimo segmento como nova base.
offset_seconds = max(seg["end"] for seg in all_segments)
return {
@@ -196,7 +275,6 @@ def _normalize_verbose_response(response: Any) -> Dict[str, Any]:
return response.model_dump()
if isinstance(response, dict):
return response
- # ultimo recurso: tenta acessar atributos diretamente
return {
"text": getattr(response, "text", ""),
"segments": getattr(response, "segments", []) or [],
diff --git a/lazier/web/templates/index.html b/lazier/web/templates/index.html
@@ -116,7 +116,7 @@
cursor: pointer;
}
- .mode-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 8px; }
+ .mode-grid { display: grid; grid-template-columns: repeat(3, 1fr); gap: 8px; }
.mode {
border: 1px solid var(--border);
border-radius: var(--radius);
@@ -195,7 +195,18 @@
.bar { height: 4px; background: var(--border); border-radius: 2px; overflow: hidden; margin-top: 12px; }
.bar span { display: block; height: 100%; background: var(--accent); }
- .actions { display: flex; flex-wrap: wrap; gap: 8px; margin-top: 12px; }
+ .actions { display: flex; flex-direction: column; gap: 10px; margin-top: 12px; }
+ .download-row { display: flex; flex-wrap: wrap; align-items: center; gap: 6px; }
+ .download-label { font-size: 0.75rem; font-weight: 500; color: var(--muted); min-width: 88px; }
+ .download-formats { display: flex; flex-wrap: wrap; gap: 4px; }
+ .action.fmt {
+ padding: 6px 10px;
+ min-height: 32px;
+ font-size: 0.75rem;
+ font-weight: 600;
+ letter-spacing: 0.02em;
+ }
+ .action-row-inline { display: flex; flex-wrap: wrap; gap: 8px; }
.action {
font-size: 0.8125rem;
padding: 8px 12px;
@@ -329,17 +340,26 @@
<section id="page-process" class="page active">
<div class="section-title">Novo processamento</div>
- <p class="section-lead">Gera transcrição completa e sumário em PT-BR; escolha no download o que quer guardar.</p>
+ <p class="section-lead">Escolha o que gerar; transcrição e sumário juntos consomem mais tokens da API.</p>
<p class="subtle" style="margin-top:-8px;">Diarização de falantes é opcional (LAZIER_DIARIZATION_PROVIDER); sem ela, o texto não traz separação por falante.</p>
<div class="stack">
<div class="upload" id="uploadArea"><input type="file" id="fileInput" hidden multiple accept="audio/*,video/*,application/pdf,text/plain,text/markdown,text/html,.md,.htm,.txt"></div>
<div class="field"><label for="urlInput">URL (opcional)</label><input type="text" id="urlInput" placeholder="YouTube, página, áudio em linha…"></div>
+ <div class="field">
+ <label>Saída</label>
+ <div class="mode-grid">
+ <div class="mode selected" onclick="selectMode('transcribe',this)"><input type="radio" checked value="transcribe"><strong>Transcrição</strong><span class="meta">Texto completo em PT-BR.</span></div>
+ <div class="mode" onclick="selectMode('summarize',this)"><input type="radio" value="summarize"><strong>Sumário</strong><span class="meta">Síntese em PT-BR.</span></div>
+ <div class="mode" onclick="selectMode('process',this)"><input type="radio" value="process"><strong>Ambos</strong><span class="meta">Transcrição + sumário.</span></div>
+ </div>
+ </div>
<div class="row-actions">
<div class="field">
- <label for="formatSelect">Formato</label>
+ <label for="formatSelect">Formato principal</label>
<select id="formatSelect"><option value="docx">DOCX</option><option value="txt">TXT</option><option value="md">Markdown</option><option value="json">JSON</option><option value="pdf">PDF</option></select>
</div>
+ <p class="subtle" style="margin:0; flex:1 1 100%;">Após concluir, pode descarregar em qualquer formato (DOCX a PDF).</p>
<button class="btn" id="processBtn" onclick="processFiles()">Processar</button>
</div>
</div>
@@ -383,9 +403,16 @@
<script>
let selectedFiles = [];
- let processingMode = 'process';
+ let processingMode = 'transcribe';
let allJobs = [];
let currentFilter = 'all';
+ const EXPORT_FORMATS = [
+ { id: 'docx', label: 'DOCX' },
+ { id: 'txt', label: 'TXT' },
+ { id: 'md', label: 'MD' },
+ { id: 'json', label: 'JSON' },
+ { id: 'pdf', label: 'PDF' },
+ ];
document.addEventListener('DOMContentLoaded', () => {
document.getElementById('currentYear').textContent = new Date().getFullYear();
@@ -414,6 +441,12 @@
}
function removeFile(index) { selectedFiles.splice(index, 1); document.getElementById('fileInput').value = ''; renderUpload(); }
+ function selectMode(mode, element) {
+ processingMode = mode;
+ document.querySelectorAll('.mode').forEach((node) => node.classList.remove('selected'));
+ element.classList.add('selected');
+ }
+
function showPage(page) {
document.querySelectorAll('.page').forEach((node) => node.classList.remove('active'));
document.querySelectorAll('.nav a').forEach((node) => node.classList.remove('active'));
@@ -468,29 +501,34 @@
return `<div class="job-head"><div><div class="job-title">${escapeHtml(title)}</div><div class="chips">${modeChip ? `<span class="chip">${modeChip}</span>` : ''}${job.format ? `<span class="chip">${job.format.toUpperCase()}</span>` : ''}${job.created_at ? `<span class="chip">${new Date(job.created_at).toLocaleString('pt-BR')}</span>` : ''}</div></div><span class="status ${job.status}">${statusLabel}</span></div><div class="bar"><span style="width:${job.progress || 0}%"></span></div>${job.error ? `<div class="error">${escapeHtml(job.error)}</div>` : ''}${renderActions(job)}`;
}
+ function formatDownloadButtons(jobId, artifact) {
+ return EXPORT_FORMATS.map((fmt) =>
+ `<a class="action fmt" href="/api/jobs/${jobId}/${artifact}?format=${fmt.id}" title="Descarregar ${artifact === 'transcription' ? 'transcrição' : 'sumário'} em ${fmt.label}">${fmt.label}</a>`
+ ).join('');
+ }
+
function renderActions(job) {
if (job.status !== 'completed' && job.status !== 'interrupted') return '';
const id = job.id;
let html = '<div class="actions">';
- const hrefTx = `/api/jobs/${id}/transcription`;
- const hrefSum = `/api/jobs/${id}/summary`;
- const hrefDl = `/api/jobs/${id}/download`;
- const hrefBundle = `/api/jobs/${id}/download-bundle`;
if (job.has_transcription) {
- html += `<a class="action" href="${hrefTx}">Transcrição</a>`;
+ html += `<div class="download-row"><span class="download-label">Transcrição</span><div class="download-formats">${formatDownloadButtons(id, 'transcription')}</div></div>`;
}
if (job.has_summary) {
- html += `<a class="action" href="${hrefSum}">Sumário</a>`;
+ html += `<div class="download-row"><span class="download-label">Sumário</span><div class="download-formats">${formatDownloadButtons(id, 'summary')}</div></div>`;
}
if (job.has_transcription && job.has_summary) {
- html += `<a class="action" href="${hrefBundle}">Pacote ZIP</a>`;
+ html += `<div class="download-row"><span class="download-label">Pacote</span><div class="download-formats">${EXPORT_FORMATS.map((fmt) =>
+ `<a class="action fmt" href="/api/jobs/${id}/download-bundle?format=${fmt.id}">ZIP ${fmt.label}</a>`
+ ).join('')}</div></div>`;
}
if (!job.has_transcription && !job.has_summary && job.result_path) {
- html += `<a class="action" href="${hrefDl}">Download</a>`;
+ const artifact = job.mode === 'summarize' ? 'summary' : 'transcription';
+ html += `<div class="download-row"><span class="download-label">Download</span><div class="download-formats">${formatDownloadButtons(id, artifact)}</div></div>`;
}
- html += `<button type="button" class="action" onclick="viewJobDetails('${id}')">Visualizar</button></div>`;
+ html += `<div class="action-row-inline"><button type="button" class="action" onclick="viewJobDetails('${id}')">Visualizar</button></div></div>`;
return html;
}
diff --git a/tests/test_api.py b/tests/test_api.py
@@ -34,6 +34,15 @@ class ApiTests(unittest.TestCase):
def tearDown(self):
shutil.rmtree(self.temp_dir, ignore_errors=True)
+ def test_process_defaults_to_transcribe_without_mode(self):
+ response = self.client.post(
+ "/api/process",
+ json={"url": "https://example.com/page", "format": "txt"},
+ )
+
+ self.assertEqual(response.status_code, 200)
+ self.assertEqual(response.json().get("mode"), "transcribe")
+
def test_process_unified_when_both_legacy_flags(self):
response = self.client.post(
"/api/process",
@@ -43,6 +52,20 @@ class ApiTests(unittest.TestCase):
self.assertEqual(response.status_code, 200)
self.assertEqual(response.json().get("mode"), "process")
+ def test_process_rejects_both_flags_false(self):
+ response = self.client.post(
+ "/api/process",
+ json={
+ "url": "https://example.com/page",
+ "format": "txt",
+ "transcribe": False,
+ "summarize": False,
+ },
+ )
+
+ self.assertEqual(response.status_code, 400)
+ self.assertIn("Escolha", response.json()["detail"])
+
def test_upload_persists_job_and_downloads_from_store(self):
output_dir = Path(os.environ["LAZIER_OUTPUT_DIR"]) / "2026" / "03" / "31" / "sample-job"
output_dir.mkdir(parents=True, exist_ok=True)
diff --git a/tests/test_processing.py b/tests/test_processing.py
@@ -16,7 +16,7 @@ class ProcessingTests(unittest.TestCase):
os.environ["OPENAI_API_KEY"] = "test-key"
os.environ["LAZIER_OUTPUT_DIR"] = str(self.temp_dir)
os.environ["LAZIER_DATA_DIR"] = str(self.temp_dir)
- # Garantir defaults previsiveis para os testes legados.
+ os.environ["LAZIER_ALWAYS_SUMMARY"] = "false"
os.environ.pop("OPENAI_ENABLE_SMART_SUMMARY", None)
os.environ.pop("OPENAI_ENABLE_CHAPTERS", None)
reset_model_config_cache()
@@ -25,7 +25,7 @@ class ProcessingTests(unittest.TestCase):
shutil.rmtree(self.temp_dir, ignore_errors=True)
reset_model_config_cache()
- def test_audio_transcribe_generates_portuguese_transcription_file(self):
+ def test_audio_transcribe_skips_summary(self):
audio_path = self.temp_dir / "sample.mp3"
audio_path.write_bytes(b"fake-audio")
@@ -38,10 +38,10 @@ class ProcessingTests(unittest.TestCase):
"lazier.core.processing.render_text_in_portuguese", return_value="Olá mundo"
), patch(
"lazier.core.processing.summarize_text", return_value="Resumo do áudio."
- ), patch(
+ ) as mock_summary, patch(
"lazier.core.processing.detect_content_type",
return_value={"content_type": "podcast", "confidence": 0.9, "rationale": ""},
- ):
+ ) as mock_detect:
result = process_source(
str(audio_path),
mode="transcribe",
@@ -53,18 +53,52 @@ class ProcessingTests(unittest.TestCase):
output_root=self.temp_dir,
)
+ mock_summary.assert_not_called()
+ mock_detect.assert_not_called()
+ self.assertEqual(result["transcription"], "Olá mundo")
+ self.assertIsNone(result["summary"])
+ self.assertIsNone(result["content_type"])
+ self.assertIsNotNone(result["transcription_path"])
+ self.assertIsNone(result["summary_path"])
+ self.assertEqual(result["result_path"], result["transcription_path"])
+ self.assertTrue(Path(result["transcription_path"]).exists())
+
+ def test_audio_process_generates_both_artifacts(self):
+ audio_path = self.temp_dir / "both.mp3"
+ audio_path.write_bytes(b"fake-audio")
+
+ with patch("lazier.core.processing.transcribe_audio", return_value="Hello world"), patch(
+ "lazier.core.processing.maybe_enrich_transcript_with_diarization",
+ lambda _p, raw, segs, _md: (raw, segs),
+ ), patch(
+ "lazier.core.processing.polish_pt_br_text", side_effect=lambda text, **kwargs: text
+ ), patch(
+ "lazier.core.processing.render_text_in_portuguese", return_value="Olá mundo"
+ ), patch(
+ "lazier.core.processing.summarize_text", return_value="Resumo do áudio."
+ ), patch(
+ "lazier.core.processing.detect_content_type",
+ return_value={"content_type": "podcast", "confidence": 0.9, "rationale": ""},
+ ):
+ result = process_source(
+ str(audio_path),
+ mode="process",
+ output_format="txt",
+ use_smart_summary=False,
+ use_chapters=False,
+ run_id="job-both",
+ source_name="both.mp3",
+ output_root=self.temp_dir,
+ )
+
self.assertEqual(result["transcription"], "Olá mundo")
self.assertEqual(result["summary"], "Resumo do áudio.")
- self.assertEqual(result["content_type"], "podcast")
self.assertIsNotNone(result["transcription_path"])
self.assertIsNotNone(result["summary_path"])
- self.assertEqual(Path(result["transcription_path"]).name, "sample - transcricao.txt")
- self.assertEqual(Path(result["summary_path"]).name, "sample - sumario.txt")
- self.assertEqual(result["result_path"], result["summary_path"])
self.assertTrue(Path(result["transcription_path"]).exists())
self.assertTrue(Path(result["summary_path"]).exists())
- def test_text_summarize_generates_summary_file_legacy(self):
+ def test_text_summarize_generates_summary_only(self):
text_path = self.temp_dir / "article.txt"
text_path.write_text("This is a long article in English.", encoding="utf-8")
@@ -76,9 +110,6 @@ class ProcessingTests(unittest.TestCase):
), patch(
"lazier.core.processing.summarize_text",
return_value="Resumo em português.",
- ), patch(
- "lazier.core.processing.detect_content_type",
- return_value={"content_type": "tech_doc", "confidence": 0.7, "rationale": ""},
):
result = process_source(
str(text_path),
@@ -92,15 +123,12 @@ class ProcessingTests(unittest.TestCase):
)
self.assertEqual(result["summary"], "Resumo em português.")
- self.assertEqual(result["transcription"], "Este é um artigo longo em português.")
- self.assertEqual(result["content_type"], "tech_doc")
+ self.assertIsNone(result["transcription"])
+ self.assertEqual(result["content_type"], "other")
self.assertIsNone(result["smart_summary"])
- self.assertIsNotNone(result["transcription_path"])
+ self.assertIsNone(result["transcription_path"])
self.assertIsNotNone(result["summary_path"])
- self.assertEqual(Path(result["summary_path"]).name, "article - sumario.txt")
- self.assertEqual(Path(result["transcription_path"]).name, "article - transcricao.txt")
self.assertEqual(result["result_path"], result["summary_path"])
- self.assertTrue(Path(result["transcription_path"]).exists())
self.assertTrue(Path(result["summary_path"]).exists())
def test_text_summarize_uses_smart_summary_when_enabled(self):
@@ -123,9 +151,6 @@ class ProcessingTests(unittest.TestCase):
"lazier.core.processing.render_text_in_portuguese",
return_value="Texto convertido para portugues",
), patch(
- "lazier.core.processing.detect_content_type",
- return_value={"content_type": "lecture", "confidence": 0.85, "rationale": ""},
- ), patch(
"lazier.core.processing.summarize_smart",
return_value=smart_payload,
) as mock_smart, patch(
@@ -145,11 +170,10 @@ class ProcessingTests(unittest.TestCase):
mock_smart.assert_called_once()
mock_legacy.assert_not_called()
self.assertEqual(result["smart_summary"], smart_payload)
- self.assertEqual(result["content_type"], "lecture")
+ self.assertEqual(result["content_type"], "other")
self.assertIn("Resumo curto", result["summary"])
- self.assertIsNotNone(result["transcription_path"])
+ self.assertIsNone(result["transcription_path"])
self.assertIsNotNone(result["summary_path"])
- self.assertEqual(Path(result["summary_path"]).name, "smart - sumario.md")
self.assertEqual(result["result_path"], result["summary_path"])
diff --git a/tests/test_smart_summary.py b/tests/test_smart_summary.py
@@ -19,6 +19,7 @@ def _fake_summary_config():
cfg.summary_chunk_overlap_chars = 10
cfg.chat_model = "gpt-5-mini"
cfg.reasoning_effort = "medium"
+ cfg.summary_parallel_workers = 1
cfg.supports_reasoning = MagicMock(return_value=True)
return cfg