lazier

personal summarizer
Log | Files | Refs | README

commit 736c1c14cfd95495e00abc5a69f2bc31dca5ea09
parent 7ccb4419e8a6a7500130626063ca93810c95223f
Author: Pablo Murad <pablo@pablomurad.com>
Date:   Tue, 31 Mar 2026 23:20:18 -0300

fix docker

Diffstat:
D.agent/skills | 1-
Mlazier/api/main.py | 19++++++++++++++++++-
Mlazier/api/routes.py | 1399+++++++++++++++++++------------------------------------------------------------
Mlazier/cli.py | 507+++++++++++++++++++++++--------------------------------------------------------
Mlazier/core/auth.py | 6+++++-
Mlazier/core/cache.py | 21++++++++++++++++++---
Mlazier/core/formats.py | 77++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Alazier/core/jobs.py | 274+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alazier/core/processing.py | 400+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mlazier/docx_generator.py | 37+++++++++++++++++++++----------------
Mlazier/downloader.py | 13++++++++++++-
Mlazier/summarizer.py | 80++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Mlazier/transcriber.py | 47++++++++++++++++++++++++++++++-----------------
Mlazier/web/extractor.py | 42++++++++++++++++++++++++++++++------------
Mlazier/web/templates/index.html | 1889+++++++++++++------------------------------------------------------------------
Mlazier/web/templates/login.html | 302+++++++++++++++----------------------------------------------------------------
Mtests/__init__.py | 3+--
Atests/test_api.py | 120+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/test_jobs.py | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/test_processing.py | 65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
20 files changed, 2015 insertions(+), 3351 deletions(-)

diff --git a/.agent/skills b/.agent/skills @@ -1 +0,0 @@ -Subproject commit 44d6277b698618538f3ae68fa8c803d7e5f07482 diff --git a/lazier/api/main.py b/lazier/api/main.py @@ -8,7 +8,11 @@ from fastapi import FastAPI from fastapi.staticfiles import StaticFiles from fastapi.middleware.cors import CORSMiddleware from starlette.middleware.sessions import SessionMiddleware -from dotenv import load_dotenv +try: + from dotenv import load_dotenv +except ImportError: # pragma: no cover - ambiente sem python-dotenv + def load_dotenv(): + return False from .routes import router from .websocket import websocket_router @@ -76,6 +80,19 @@ def create_app() -> FastAPI: except Exception as e: print(f"Aviso: Cache Redis não disponível: {e}") app.state.cache = None + + # Inicializa persistencia de jobs + try: + from ..core.jobs import get_job_store + + job_store = get_job_store() + recovered = job_store.mark_incomplete_as_interrupted() + if recovered: + print(f"Aviso: {recovered} job(s) marcados como interrompidos após reinicialização") + app.state.job_store = job_store + except Exception as e: + print(f"ERRO: Falha ao inicializar persistência de jobs: {e}") + raise # Rotas de autenticação (públicas) app.include_router(auth_router) diff --git a/lazier/api/routes.py b/lazier/api/routes.py @@ -1,273 +1,220 @@ """ -Rotas da API FastAPI +Rotas da API FastAPI. """ +import logging import os import uuid -import asyncio -import logging +from datetime import datetime from pathlib import Path from typing import List, Optional -from fastapi import APIRouter, UploadFile, File, HTTPException, BackgroundTasks, Form -from fastapi.responses import FileResponse, JSONResponse + +from fastapi import APIRouter, BackgroundTasks, File, Form, HTTPException, UploadFile +from fastapi.responses import FileResponse from pydantic import BaseModel -from datetime import datetime -from ..core.cache import get_cache_manager, calculate_file_hash, calculate_url_hash -from ..core.batch import get_batch_processor +from ..audio_processor import extract_audio_from_video from ..core.formats import export -from ..core.exceptions import ( - YouTubeDownloadError, - YouTubeVideoUnavailableError, - YouTubeAccessDeniedError, - MusicContentError, -) +from ..core.jobs import build_job_artifact_path, get_job_store +from ..core.processing import process_source from ..core.supported_sites import SUPPORTED_VIDEO_SITES -from ..utils import validate_input, get_output_filename, is_youtube_url, get_lazier_filename -from ..downloader import download_youtube_audio, download_video_audio -from ..audio_processor import prepare_audio_file, extract_audio_from_video -from ..transcriber import transcribe_audio -from ..summarizer import summarize_text, summarize_text_file, summarize_web_page, summarize_pdf -from ..web.extractor import extract_web_content, extract_text_file_content, extract_pdf_content +from ..core.playlist import is_playlist_url +from ..utils import AUDIO_EXTENSIONS, TEXT_EXTENSIONS, VIDEO_EXTENSIONS from .websocket import broadcast_progress -# Configurar logging logger = logging.getLogger(__name__) router = APIRouter() -# Diretórios -UPLOAD_DIR = Path(os.getenv('LAZIER_UPLOAD_DIR', '/app/uploads')) -OUTPUT_DIR = Path(os.getenv('LAZIER_OUTPUT_DIR', '/app/outputs')) +UPLOAD_DIR = Path(os.getenv("LAZIER_UPLOAD_DIR", "/app/uploads")) UPLOAD_DIR.mkdir(parents=True, exist_ok=True) -OUTPUT_DIR.mkdir(parents=True, exist_ok=True) - -# Jobs em memória (em produção, usar Redis ou banco de dados) -jobs: dict = {} class ProcessRequest(BaseModel): - """Request para processar URL""" + """Request para processar URL.""" + url: str format: str = "docx" - transcribe: bool = True - summarize: bool = True + mode: Optional[str] = None + transcribe: Optional[bool] = None + summarize: Optional[bool] = None + + +def _resolve_mode( + mode: Optional[str], + transcribe: Optional[bool], + summarize: Optional[bool], +) -> str: + if mode: + if mode not in {"transcribe", "summarize"}: + raise HTTPException(status_code=400, detail="Modo invalido. Use 'transcribe' ou 'summarize'.") + return mode + + if transcribe is None and summarize is None: + raise HTTPException(status_code=400, detail="Informe `mode`.") + + if transcribe and not summarize: + return "transcribe" + if summarize and not transcribe: + return "summarize" + + raise HTTPException( + status_code=400, + detail="A combinacao legada de transcribe/summarize nao aceita mais os dois modos ao mesmo tempo.", + ) -class JobStatus(BaseModel): - """Status de um job""" - id: str - status: str - progress: int - result_path: Optional[str] = None - error: Optional[str] = None +def _job_title(job: dict) -> str: + metadata = job.get("metadata", {}) + if metadata.get("title"): + return metadata["title"] + if job.get("source_name"): + return job["source_name"] + if job.get("source_url"): + return job["source_url"] + if job.get("file_path"): + return Path(job["file_path"]).stem + return f"Job {job['id']}" -def process_file_async( - file_path: str, - job_id: str, - output_format: str = "docx", - should_transcribe: bool = True, - should_summarize: bool = True -): - """Processa arquivo de forma assíncrona""" +def _progress_updater(job_id: str): + store = get_job_store() + + def callback(progress: int, status: str, message: Optional[str] = None) -> None: + updates = {"progress": progress, "status": status} + if status == "failed": + updates["error"] = message + store.update_job(job_id, **updates) + broadcast_progress(job_id, progress, status, message) + + return callback + + +def _create_job( + *, + mode: str, + output_format: str, + source_name: Optional[str] = None, + source_url: Optional[str] = None, + file_path: Optional[str] = None, + input_type: Optional[str] = None, +) -> dict: + store = get_job_store() + job_id = str(uuid.uuid4()) + created_at = datetime.now().isoformat() + return store.create_job( + { + "id": job_id, + "mode": mode, + "status": "pending", + "progress": 0, + "input_type": input_type, + "source_name": source_name, + "source_url": source_url, + "file_path": file_path, + "format": output_format, + "metadata": {}, + "created_at": created_at, + } + ) + + +def _process_job(job_id: str) -> None: + store = get_job_store() + job = store.get_job(job_id) + if not job: + logger.error("Job %s nao encontrado para processamento", job_id) + return + + source = job.get("source_url") or job.get("file_path") + progress_callback = _progress_updater(job_id) + try: - # Inicializa campos do job se não existirem - if 'transcription' not in jobs[job_id]: - jobs[job_id]['transcription'] = None - jobs[job_id]['summary'] = None - jobs[job_id]['transcription_path'] = None - jobs[job_id]['summary_path'] = None - jobs[job_id]['metadata'] = {} - - jobs[job_id]['status'] = 'processing' - jobs[job_id]['progress'] = 10 - broadcast_progress(job_id, 10, 'processing', 'Iniciando processamento...') - - # Valida input - is_valid, input_type, error_msg = validate_input(file_path) - if not is_valid: - raise Exception(error_msg) - - jobs[job_id]['progress'] = 20 - broadcast_progress(job_id, 20, 'processing', 'Arquivo validado') - - # Determina tipo de processamento - cache = get_cache_manager() - file_hash = calculate_file_hash(file_path) - - transcription = None - summary = None - metadata = {} - transcription_path = None - summary_path = None - transcription_internal = None # Para uso interno quando apenas sumarizar - - if input_type in ['audio', 'video']: - # Processa áudio/vídeo - # Nota: Vídeos já foram convertidos para áudio no upload - # Para sumarizar áudio/vídeo, sempre precisa transcrever primeiro - needs_transcription = should_transcribe or should_summarize - - if needs_transcription: - jobs[job_id]['progress'] = 30 - broadcast_progress(job_id, 30, 'processing', 'Preparando áudio...') - - # Verifica cache - cached = cache.get('transcription', file_hash) if cache else None - if cached: - transcription_internal = cached.get('transcription') - metadata = cached.get('metadata', {}) - broadcast_progress(job_id, 50, 'processing', 'Transcrição encontrada no cache') - else: - # Prepara áudio (vídeos já vêm como áudio, então is_video=False) - # Mantém fallback para vídeo caso algum chegue aqui (não deveria) - audio_file = prepare_audio_file(file_path, is_video=(input_type == 'video')) - jobs[job_id]['progress'] = 50 - broadcast_progress(job_id, 50, 'processing', 'Transcrevendo áudio...') - - # Transcreve - transcription_internal = transcribe_audio(audio_file, language='pt', model='whisper-1') - jobs[job_id]['progress'] = 70 - broadcast_progress(job_id, 70, 'processing', 'Transcrição concluída') - - # Salva no cache - if cache: - cache.set('transcription', file_hash, { - 'transcription': transcription_internal, - 'metadata': metadata, - 'timestamp': datetime.now().isoformat(), - }) - - # Se usuário pediu transcrição, armazena no job - if should_transcribe: - transcription = transcription_internal - - # Gera arquivo só com transcrição se apenas transcrever - if transcription and should_transcribe and not should_summarize: - transcription_path = Path(get_lazier_filename(OUTPUT_DIR, output_format, "_transcription")) - export( - transcription=transcription, - summary=None, - metadata=metadata, - output_path=str(transcription_path), - format_type=output_format - ) - - # Sumariza se solicitado - if should_summarize: - jobs[job_id]['progress'] = 80 - broadcast_progress(job_id, 80, 'processing', 'Gerando sumário...') - if transcription_internal: - text_hash = calculate_url_hash(transcription_internal) - cached_summary = cache.get('summary', text_hash) if cache else None - - if cached_summary: - summary = cached_summary.get('summary') - broadcast_progress(job_id, 85, 'processing', 'Sumário encontrado no cache') - else: - summary = summarize_text(transcription_internal, model='gpt-4o-mini', language='pt-BR') - if cache: - cache.set('summary', text_hash, { - 'summary': summary, - 'timestamp': datetime.now().isoformat(), - }) - jobs[job_id]['progress'] = 90 - broadcast_progress(job_id, 90, 'processing', 'Sumário concluído') - - # Gera arquivo só com sumário se apenas sumarizar - if summary and not should_transcribe: - summary_path = Path(get_lazier_filename(OUTPUT_DIR, output_format, "_summary")) - export( - transcription="", - summary=summary, - metadata=metadata, - output_path=str(summary_path), - format_type=output_format - ) - - elif input_type == 'text' or Path(file_path).suffix.lower() == '.pdf': - # Processa texto/PDF - jobs[job_id]['progress'] = 40 - broadcast_progress(job_id, 40, 'processing', 'Extraindo conteúdo do arquivo...') - - # Para textos/PDFs, extrai conteúdo (equivalente a transcrição) - # Mas só armazena se usuário pediu transcrição - if Path(file_path).suffix.lower() == '.pdf': - content_data = extract_pdf_content(file_path) - content_extracted = content_data['content'] - if should_summarize: - broadcast_progress(job_id, 60, 'processing', 'Sumarizando PDF...') - summary = summarize_pdf(file_path) - else: - content_data = extract_text_file_content(file_path) - content_extracted = content_data['content'] - if should_summarize: - broadcast_progress(job_id, 60, 'processing', 'Sumarizando texto...') - summary = summarize_text_file(file_path) - - # Só armazena transcription se usuário pediu - if should_transcribe: - transcription = content_extracted - - metadata = {'title': content_data.get('title', 'Documento'), 'file_path': file_path} - - # Gera arquivo só com sumário se apenas sumarizar - if should_summarize and summary and not should_transcribe: - summary_path = Path(get_lazier_filename(OUTPUT_DIR, output_format, "_summary")) - export( - transcription="", - summary=summary, - metadata=metadata, - output_path=str(summary_path), - format_type=output_format - ) - - jobs[job_id]['progress'] = 90 - broadcast_progress(job_id, 90, 'processing', 'Conteúdo processado') - - # Gera arquivo consolidado apenas se ambos foram solicitados OU se apenas um foi solicitado mas não tem arquivo separado - # Não gera consolidado quando apenas sumarizar (já tem arquivo separado) - should_generate_consolidated = False - if should_transcribe and should_summarize: - # Ambos solicitados: sempre gera consolidado - should_generate_consolidated = True - elif should_transcribe and not should_summarize: - # Apenas transcrever: gera consolidado se não tem arquivo separado - should_generate_consolidated = not transcription_path - elif should_summarize and not should_transcribe: - # Apenas sumarizar: NÃO gera consolidado (já tem arquivo separado) - should_generate_consolidated = False - - if should_generate_consolidated: - broadcast_progress(job_id, 95, 'processing', 'Gerando arquivo de saída...') - output_path = Path(get_lazier_filename(OUTPUT_DIR, output_format)) - - export( - transcription=transcription or "", - summary=summary if should_summarize else None, - metadata=metadata, - output_path=str(output_path), - format_type=output_format - ) - - jobs[job_id]['result_path'] = str(output_path) - - # Armazena dados separados - só armazena transcription se foi solicitado - jobs[job_id]['transcription'] = transcription if should_transcribe else None - jobs[job_id]['summary'] = summary - jobs[job_id]['transcription_path'] = str(transcription_path) if transcription_path else None - jobs[job_id]['summary_path'] = str(summary_path) if summary_path else None - jobs[job_id]['metadata'] = metadata - - jobs[job_id]['status'] = 'completed' - jobs[job_id]['progress'] = 100 - broadcast_progress(job_id, 100, 'completed', 'Processamento concluído') - - except Exception as e: - jobs[job_id]['status'] = 'failed' - jobs[job_id]['error'] = str(e) - broadcast_progress(job_id, 0, 'failed', f'Erro: {str(e)}') + store.update_job(job_id, status="processing", progress=1, error=None) + broadcast_progress(job_id, 1, "processing", "Iniciando processamento...") + + result = process_source( + source, + mode=job["mode"], + output_format=job["format"], + run_id=job_id, + source_name=job.get("source_name"), + created_at=job.get("created_at"), + progress_callback=progress_callback, + ) + store.update_job( + job_id, + status="completed", + progress=100, + input_type=result.get("input_type", job.get("input_type")), + metadata=result.get("metadata", {}), + transcription=result.get("transcription"), + summary=result.get("summary"), + result_path=result.get("result_path"), + transcription_path=result.get("transcription_path"), + summary_path=result.get("summary_path"), + source_name=result.get("source_name") or job.get("source_name"), + error=None, + ) + broadcast_progress(job_id, 100, "completed", "Processamento concluido") + except Exception as exc: + logger.exception("Erro ao processar job %s", job_id) + store.update_job(job_id, status="failed", progress=0, error=str(exc)) + broadcast_progress(job_id, 0, "failed", str(exc)) + + +def _ensure_download_file(job: dict, artifact_kind: str) -> Optional[str]: + existing_path_key = { + "transcription": "transcription_path", + "summary": "summary_path", + "result": "result_path", + }[artifact_kind] + existing_path = job.get(existing_path_key) + if existing_path and Path(existing_path).exists(): + return existing_path + + if artifact_kind == "transcription" and job.get("transcription"): + output_path = build_job_artifact_path( + job_id=job["id"], + source_name=job.get("source_name"), + format_type=job.get("format", "docx"), + artifact_kind="transcription", + created_at=job.get("created_at"), + ) + export( + transcription=job["transcription"], + summary=None, + metadata=job.get("metadata", {}), + output_path=str(output_path), + format_type=job.get("format", "docx"), + ) + get_job_store().update_job(job["id"], transcription_path=str(output_path)) + return str(output_path) + + if artifact_kind == "summary" and job.get("summary"): + output_path = build_job_artifact_path( + job_id=job["id"], + source_name=job.get("source_name"), + format_type=job.get("format", "docx"), + artifact_kind="summary", + created_at=job.get("created_at"), + ) + export( + transcription="", + summary=job["summary"], + metadata=job.get("metadata", {}), + output_path=str(output_path), + format_type=job.get("format", "docx"), + ) + get_job_store().update_job(job["id"], summary_path=str(output_path)) + return str(output_path) + + if artifact_kind == "result": + mode = job.get("mode") + preferred_key = "transcription" if mode == "transcribe" else "summary" + return _ensure_download_file(job, preferred_key) + + return None @router.post("/upload") @@ -275,875 +222,197 @@ async def upload_files( background_tasks: BackgroundTasks, files: List[UploadFile] = File(...), format: str = Form("docx"), - transcribe: bool = Form(True), - summarize: bool = Form(True) + mode: Optional[str] = Form(None), + transcribe: Optional[bool] = Form(None), + summarize: Optional[bool] = Form(None), ): - """Upload de arquivos para processamento""" - from ..utils import VIDEO_EXTENSIONS - - job_ids = [] - + """Upload de arquivos para processamento.""" + + resolved_mode = _resolve_mode(mode, transcribe, summarize) + valid_extensions = AUDIO_EXTENSIONS | VIDEO_EXTENSIONS | TEXT_EXTENSIONS | {".pdf"} + jobs = [] + for file in files: - # Valida tipo de arquivo ext = Path(file.filename).suffix.lower() - valid_extensions = {'.mp3', '.wav', '.m4a', '.mp4', '.avi', '.mkv', '.pdf', '.txt', '.md', '.html'} if ext not in valid_extensions: - raise HTTPException(status_code=400, detail=f"Tipo de arquivo não suportado: {ext}") - - # Salva arquivo + raise HTTPException(status_code=400, detail=f"Tipo de arquivo nao suportado: {ext}") + file_path = UPLOAD_DIR / f"{uuid.uuid4()}_{file.filename}" try: - with open(file_path, 'wb') as f: - content = await file.read() - f.write(content) - except Exception as e: - logger.error(f"Erro ao salvar arquivo {file.filename}: {e}") - raise HTTPException(status_code=500, detail=f"Erro ao salvar arquivo: {str(e)}") - - # Se for vídeo, extrair áudio imediatamente (sincrono para o upload mas rápido) + with open(file_path, "wb") as handle: + handle.write(await file.read()) + except Exception as exc: + logger.error("Erro ao salvar arquivo %s: %s", file.filename, exc) + raise HTTPException(status_code=500, detail=f"Erro ao salvar arquivo: {exc}") from exc + if ext in VIDEO_EXTENSIONS: try: - # Extrai áudio do vídeo - audio_file = extract_audio_from_video(str(file_path)) - # Deleta vídeo original - file_path.unlink() - # Usa áudio como arquivo principal - file_path = Path(audio_file) - except Exception as e: - # Se falhar, mantém vídeo original e reporta erro - logger.error(f"Erro ao extrair áudio do vídeo {file.filename}: {e}") - raise HTTPException( - status_code=500, - detail=f"Erro ao extrair áudio do vídeo: {str(e)}" - ) - - # Cria job - job_id = str(uuid.uuid4()) - jobs[job_id] = { - 'id': job_id, - 'status': 'pending', - 'progress': 0, - 'file_path': str(file_path), - 'format': format, - 'transcribe': transcribe, - 'summarize': summarize, - 'transcription': None, - 'summary': None, - 'transcription_path': None, - 'summary_path': None, - 'metadata': {}, - 'created_at': datetime.now().isoformat(), - } - - # Processa em background REAL (usando BackgroundTasks) - background_tasks.add_task(process_file_async, str(file_path), job_id, format, transcribe, summarize) - - job_ids.append(job_id) - - return {"job_ids": job_ids, "message": f"{len(job_ids)} arquivo(s) enviado(s)"} + audio_path = extract_audio_from_video(str(file_path)) + file_path.unlink(missing_ok=True) + file_path = Path(audio_path) + except Exception as exc: + logger.error("Erro ao extrair audio do video %s: %s", file.filename, exc) + raise HTTPException(status_code=500, detail=f"Erro ao extrair audio do video: {exc}") from exc + + job = _create_job( + mode=resolved_mode, + output_format=format, + source_name=file.filename, + file_path=str(file_path), + input_type="file", + ) + background_tasks.add_task(_process_job, job["id"]) + jobs.append(job["id"]) + + return {"job_ids": jobs, "message": f"{len(jobs)} arquivo(s) enviado(s)"} @router.post("/process") async def process_url(request: ProcessRequest, background_tasks: BackgroundTasks): - """Processa URL (YouTube ou página web)""" - job_id = str(uuid.uuid4()) - - jobs[job_id] = { - 'id': job_id, - 'status': 'pending', - 'progress': 0, - 'url': request.url, - 'format': request.format, - 'transcribe': request.transcribe, - 'summarize': request.summarize, - 'transcription': None, - 'summary': None, - 'transcription_path': None, - 'summary_path': None, - 'metadata': {}, - 'created_at': datetime.now().isoformat(), - } - - # Processa em background - if is_youtube_url(request.url): - background_tasks.add_task(process_youtube_async, request.url, job_id, request.format, request.transcribe, request.summarize) - else: - # Tenta processar como vídeo de qualquer site; se falhar, fallback para página web - background_tasks.add_task(process_video_url_async, request.url, job_id, request.format, request.transcribe, request.summarize) - - return {"job_id": job_id, "status": "processing"} - - -def process_youtube_async(url: str, job_id: str, output_format: str, should_transcribe: bool, should_summarize: bool): - """Processa vídeo do YouTube""" - try: - # Inicializa campos do job se não existirem - if 'transcription' not in jobs[job_id]: - jobs[job_id]['transcription'] = None - jobs[job_id]['summary'] = None - jobs[job_id]['transcription_path'] = None - jobs[job_id]['summary_path'] = None - jobs[job_id]['metadata'] = {} - - jobs[job_id]['status'] = 'processing' - jobs[job_id]['progress'] = 10 - broadcast_progress(job_id, 10, 'processing', 'Iniciando processamento do YouTube...') - - cache = get_cache_manager() - url_hash = calculate_url_hash(url) - - transcription = None - summary = None - transcription_path = None - summary_path = None - transcription_internal = None # Para uso interno quando apenas sumarizar - - # Para YouTube, sempre precisa transcrever para sumarizar - needs_transcription = should_transcribe or should_summarize - - # Verifica cache - cached = cache.get('youtube', url_hash) if cache else None - if cached: - transcription_internal = cached.get('transcription') - summary = cached.get('summary') - metadata = cached.get('metadata', {}) - # Se usuário pediu transcrição, armazena - if should_transcribe: - transcription = transcription_internal - if transcription_internal and (not should_summarize or summary): - jobs[job_id]['progress'] = 100 - broadcast_progress(job_id, 100, 'completed', 'Dados encontrados no cache') - else: - metadata = {} - if needs_transcription: - # Download - broadcast_progress(job_id, 20, 'processing', 'Baixando vídeo do YouTube...') - try: - audio_file, metadata = download_youtube_audio(url, str(UPLOAD_DIR)) - except MusicContentError as e: - logger.error(f"Conteúdo detectado como música (job {job_id}): {str(e)}") - user_message = "Conteúdo detectado como música não é processado pelo Lazier." - jobs[job_id]['status'] = 'failed' - jobs[job_id]['error'] = user_message - broadcast_progress(job_id, 0, 'failed', user_message) - return - except YouTubeVideoUnavailableError as e: - logger.error(f"Vídeo não disponível (job {job_id}): {str(e)}") - user_message = ( - "Erro: O vídeo não está disponível. " - "Pode ser que o vídeo seja privado, tenha sido removido ou esteja bloqueado por região." - ) - if e.error_code: - user_message += f" (Código: {e.error_code})" - jobs[job_id]['status'] = 'failed' - jobs[job_id]['error'] = user_message - broadcast_progress(job_id, 0, 'failed', user_message) - return - except YouTubeAccessDeniedError as e: - logger.error(f"Acesso negado ao vídeo (job {job_id}): {str(e)}") - user_message = ( - "Erro ao baixar vídeo do YouTube: O YouTube está bloqueando o download. " - "Isso pode ser temporário. Tente novamente em alguns minutos ou verifique se o vídeo está disponível." - ) - if e.error_code: - user_message += f" (Código: {e.error_code})" - jobs[job_id]['status'] = 'failed' - jobs[job_id]['error'] = user_message - broadcast_progress(job_id, 0, 'failed', user_message) - return - except YouTubeDownloadError as e: - logger.error(f"Erro ao baixar vídeo do YouTube (job {job_id}): {str(e)}") - user_message = f"Erro ao baixar vídeo do YouTube: {str(e)}" - if e.error_code: - user_message += f" (Código: {e.error_code})" - jobs[job_id]['status'] = 'failed' - jobs[job_id]['error'] = user_message - broadcast_progress(job_id, 0, 'failed', user_message) - return - except Exception as e: - logger.error(f"Erro inesperado ao baixar vídeo (job {job_id}): {str(e)}") - user_message = f"Erro inesperado ao baixar vídeo: {str(e)}" - jobs[job_id]['status'] = 'failed' - jobs[job_id]['error'] = user_message - broadcast_progress(job_id, 0, 'failed', user_message) - return - - jobs[job_id]['progress'] = 30 - broadcast_progress(job_id, 30, 'processing', 'Transcrevendo áudio...') - - # Transcreve - try: - transcription_internal = transcribe_audio(audio_file, language='pt', model='whisper-1') - except Exception as e: - logger.error(f"Erro ao transcrever áudio (job {job_id}): {str(e)}") - jobs[job_id]['status'] = 'failed' - jobs[job_id]['error'] = f"Erro ao transcrever áudio: {str(e)}" - broadcast_progress(job_id, 0, 'failed', f"Erro ao transcrever áudio: {str(e)}") - return - - jobs[job_id]['progress'] = 60 - broadcast_progress(job_id, 60, 'processing', 'Transcrição concluída') - - # Se usuário pediu transcrição, armazena no job - if should_transcribe: - transcription = transcription_internal - - # Gera arquivo só com transcrição se apenas transcrever - if transcription and should_transcribe and not should_summarize: - transcription_path = Path(get_lazier_filename(OUTPUT_DIR, output_format, "_transcription")) - export( - transcription=transcription, - summary=None, - metadata=metadata, - output_path=str(transcription_path), - format_type=output_format - ) - - # Sumariza se solicitado - if should_summarize: - if transcription_internal: - broadcast_progress(job_id, 70, 'processing', 'Gerando sumário...') - summary = summarize_text(transcription_internal, model='gpt-4o-mini', language='pt-BR') - - jobs[job_id]['progress'] = 80 - broadcast_progress(job_id, 80, 'processing', 'Sumário concluído') - - # Gera arquivo só com sumário se apenas sumarizar - if summary and not should_transcribe: - summary_path = Path(get_lazier_filename(OUTPUT_DIR, output_format, "_summary")) - export( - transcription="", - summary=summary, - metadata=metadata, - output_path=str(summary_path), - format_type=output_format - ) - - # Salva cache - if cache and transcription_internal: - cache.set('youtube', url_hash, { - 'transcription': transcription_internal, - 'summary': summary, - 'metadata': metadata, - 'timestamp': datetime.now().isoformat(), - }) - - # Garantir link do vídeo no cabeçalho do documento - if metadata is None: - metadata = {} - if not metadata.get('webpage_url'): - metadata = {**metadata, 'webpage_url': url} - - # Gera arquivo consolidado apenas se ambos foram solicitados OU se apenas transcrever (sem arquivo separado) - should_generate_consolidated = False - if should_transcribe and should_summarize: - # Ambos solicitados: sempre gera consolidado - should_generate_consolidated = True - elif should_transcribe and not should_summarize: - # Apenas transcrever: gera consolidado se não tem arquivo separado - should_generate_consolidated = not transcription_path - elif should_summarize and not should_transcribe: - # Apenas sumarizar: NÃO gera consolidado (já tem arquivo separado) - should_generate_consolidated = False - - if should_generate_consolidated: - broadcast_progress(job_id, 90, 'processing', 'Gerando arquivo de saída...') - output_path = Path(get_lazier_filename(OUTPUT_DIR, output_format)) - - export( - transcription=transcription or "", - summary=summary if should_summarize else None, - metadata=metadata, - output_path=str(output_path), - format_type=output_format - ) - - jobs[job_id]['result_path'] = str(output_path) - - # Armazena dados separados - só armazena transcription se foi solicitado - jobs[job_id]['transcription'] = transcription if should_transcribe else None - jobs[job_id]['summary'] = summary - jobs[job_id]['transcription_path'] = str(transcription_path) if transcription_path else None - jobs[job_id]['summary_path'] = str(summary_path) if summary_path else None - jobs[job_id]['metadata'] = metadata - - jobs[job_id]['status'] = 'completed' - jobs[job_id]['progress'] = 100 - broadcast_progress(job_id, 100, 'completed', 'Processamento concluído') - - except (YouTubeVideoUnavailableError, YouTubeAccessDeniedError, YouTubeDownloadError) as e: - # Erros de download já foram tratados acima, mas pode haver outros casos - error_msg = str(e) - logger.error(f"Erro no processamento do YouTube (job {job_id}): {error_msg}") - - # Se o erro já foi tratado acima (download/transcrição), não sobrescreve - if jobs[job_id].get('status') != 'failed': - user_message = f"Erro no processamento: {error_msg}" - if hasattr(e, 'error_code') and e.error_code: - user_message += f" (Código: {e.error_code})" - jobs[job_id]['status'] = 'failed' - jobs[job_id]['error'] = user_message - broadcast_progress(job_id, 0, 'failed', user_message) - except Exception as e: - error_msg = str(e) - logger.error(f"Erro inesperado no processamento do YouTube (job {job_id}): {error_msg}") - - # Se o erro já foi tratado acima (download/transcrição), não sobrescreve - if jobs[job_id].get('status') != 'failed': - jobs[job_id]['status'] = 'failed' - jobs[job_id]['error'] = error_msg - broadcast_progress(job_id, 0, 'failed', f'Erro: {error_msg}') - - -def process_video_url_async(url: str, job_id: str, output_format: str, should_transcribe: bool, should_summarize: bool): - """Tenta processar URL como vídeo (TED, Reddit, Vimeo, etc.); se falhar, fallback para página web.""" - try: - broadcast_progress(job_id, 10, 'processing', 'Tentando extrair vídeo/áudio da URL...') - try: - audio_file, metadata = download_video_audio(url, str(UPLOAD_DIR)) - except MusicContentError as e: - logger.error(f"Conteúdo detectado como música (job {job_id}): {str(e)}") - user_message = "Conteúdo detectado como música não é processado pelo Lazier." - jobs[job_id]['status'] = 'failed' - jobs[job_id]['error'] = user_message - broadcast_progress(job_id, 0, 'failed', user_message) - return - except Exception as e: - logger.info(f"URL não é vídeo ou falha ao baixar (job {job_id}), fallback para página web: {e}") - process_web_async(url, job_id, output_format, should_transcribe, should_summarize) - return - _run_video_pipeline( - job_id=job_id, - audio_file=audio_file, - metadata=metadata, - output_format=output_format, - should_transcribe=should_transcribe, - should_summarize=should_summarize, - cache_prefix='video', - url_hash=calculate_url_hash(url), - url=url, - ) - except Exception as e: - if jobs[job_id].get('status') != 'failed': - jobs[job_id]['status'] = 'failed' - jobs[job_id]['error'] = str(e) - broadcast_progress(job_id, 0, 'failed', str(e)) + """Processa URL.""" + resolved_mode = _resolve_mode(request.mode, request.transcribe, request.summarize) + if is_playlist_url(request.url): + raise HTTPException( + status_code=400, + detail="Playlists continuam disponiveis apenas no fluxo explicito da CLI.", + ) -def _run_video_pipeline( - job_id: str, - audio_file: str, - metadata: dict, - output_format: str, - should_transcribe: bool, - should_summarize: bool, - cache_prefix: str, - url_hash: str, - url: str = "", -): - """Pipeline comum: transcrever, sumarizar, exportar, cache. Usado por YouTube e vídeo genérico.""" - if 'transcription' not in jobs[job_id]: - jobs[job_id]['transcription'] = None - jobs[job_id]['summary'] = None - jobs[job_id]['transcription_path'] = None - jobs[job_id]['summary_path'] = None - jobs[job_id]['metadata'] = {} - cache = get_cache_manager() - cached = cache.get(cache_prefix, url_hash) if cache else None - transcription = None - summary = None - transcription_path = None - summary_path = None - transcription_internal = None - needs_transcription = should_transcribe or should_summarize - if cached: - transcription_internal = cached.get('transcription') - summary = cached.get('summary') - metadata = cached.get('metadata', metadata) - # Garantir link do vídeo no cabeçalho do documento - if metadata is None: - metadata = {} - if url and not metadata.get('webpage_url'): - metadata = {**metadata, 'webpage_url': url} - if should_transcribe: - transcription = transcription_internal - if transcription_internal and (not should_summarize or summary): - jobs[job_id]['progress'] = 100 - broadcast_progress(job_id, 100, 'completed', 'Dados encontrados no cache') - else: - if needs_transcription: - broadcast_progress(job_id, 30, 'processing', 'Transcrevendo áudio...') - transcription_internal = transcribe_audio(audio_file, language='pt', model='whisper-1') - broadcast_progress(job_id, 60, 'processing', 'Transcrição concluída') - if should_transcribe: - transcription = transcription_internal - if transcription and should_transcribe and not should_summarize: - transcription_path = Path(get_lazier_filename(OUTPUT_DIR, output_format, "_transcription")) - export(transcription=transcription, summary=None, metadata=metadata, output_path=str(transcription_path), format_type=output_format) - if should_summarize and transcription_internal: - broadcast_progress(job_id, 70, 'processing', 'Gerando sumário...') - summary = summarize_text(transcription_internal, model='gpt-4o-mini', language='pt-BR') - broadcast_progress(job_id, 80, 'processing', 'Sumário concluído') - if summary and not should_transcribe: - summary_path = Path(get_lazier_filename(OUTPUT_DIR, output_format, "_summary")) - export(transcription="", summary=summary, metadata=metadata, output_path=str(summary_path), format_type=output_format) - if cache and transcription_internal: - cache.set(cache_prefix, url_hash, { - 'transcription': transcription_internal, - 'summary': summary, - 'metadata': metadata, - 'timestamp': datetime.now().isoformat(), - }) - # Garantir link do vídeo no cabeçalho do documento (também quando não veio do cache) - if metadata is None: - metadata = {} - if url and not metadata.get('webpage_url'): - metadata = {**metadata, 'webpage_url': url} - should_generate_consolidated = (should_transcribe and should_summarize) or (should_transcribe and not should_summarize and not transcription_path) - if should_generate_consolidated: - broadcast_progress(job_id, 90, 'processing', 'Gerando arquivo de saída...') - output_path = Path(get_lazier_filename(OUTPUT_DIR, output_format)) - export(transcription=transcription or "", summary=summary if should_summarize else None, metadata=metadata, output_path=str(output_path), format_type=output_format) - jobs[job_id]['result_path'] = str(output_path) - jobs[job_id]['transcription'] = transcription if should_transcribe else None - jobs[job_id]['summary'] = summary - jobs[job_id]['transcription_path'] = str(transcription_path) if transcription_path else None - jobs[job_id]['summary_path'] = str(summary_path) if summary_path else None - jobs[job_id]['metadata'] = metadata - jobs[job_id]['status'] = 'completed' - jobs[job_id]['progress'] = 100 - broadcast_progress(job_id, 100, 'completed', 'Processamento concluído') - - -def process_web_async(url: str, job_id: str, output_format: str, should_transcribe: bool, should_summarize: bool): - """Processa página web""" - try: - # Inicializa campos do job se não existirem - if 'transcription' not in jobs[job_id]: - jobs[job_id]['transcription'] = None - jobs[job_id]['summary'] = None - jobs[job_id]['transcription_path'] = None - jobs[job_id]['summary_path'] = None - jobs[job_id]['metadata'] = {} - - jobs[job_id]['status'] = 'processing' - jobs[job_id]['progress'] = 20 - broadcast_progress(job_id, 20, 'processing', 'Iniciando processamento da página web...') - - cache = get_cache_manager() - url_hash = calculate_url_hash(url) - - content = None - summary = None - transcription_path = None - summary_path = None - metadata = {} - - # Verifica cache - cached = cache.get('web', url_hash) if cache else None - needs_processing = True - - if cached: - cached_content = cached.get('content') - cached_summary = cached.get('summary') - cached_metadata = cached.get('metadata', {}) - - # Usa dados do cache se disponíveis e necessários - if should_transcribe and cached_content: - content = cached_content - if not metadata: - metadata = cached_metadata - if should_summarize and cached_summary: - summary = cached_summary - if not metadata: - metadata = cached_metadata - - # Verifica se tem tudo que precisa no cache - has_all_needed = True - if should_transcribe and not content: - has_all_needed = False - if should_summarize and not summary: - has_all_needed = False - - if has_all_needed: - needs_processing = False - jobs[job_id]['progress'] = 100 - broadcast_progress(job_id, 100, 'completed', 'Dados encontrados no cache') - - if needs_processing: - # Se precisa transcrever e não tem no cache, extrai conteúdo completo - if should_transcribe and not content: - broadcast_progress(job_id, 30, 'processing', 'Extraindo conteúdo da página...') - content_data = extract_web_content(url) - content = content_data['content'] - metadata = {'title': content_data.get('title', 'Página Web'), 'webpage_url': url} - jobs[job_id]['progress'] = 50 - broadcast_progress(job_id, 50, 'processing', 'Conteúdo extraído') - - # Sumariza se solicitado - if should_summarize: - broadcast_progress(job_id, 60, 'processing', 'Gerando sumário...') - # Se já tem conteúdo extraído, usa ele. Senão, summarize_web_page extrai internamente - if content: - summary = summarize_text(content, model='gpt-4o-mini', language='pt-BR') - else: - summary = summarize_web_page(url) - # Se não tinha conteúdo antes, pega metadados da extração interna - if not metadata: - content_data = extract_web_content(url) - metadata = {'title': content_data.get('title', 'Página Web'), 'webpage_url': url} - - jobs[job_id]['progress'] = 80 - broadcast_progress(job_id, 80, 'processing', 'Sumário concluído') - - # Gera arquivo só com sumário se apenas sumarizar - if summary and not should_transcribe: - summary_path = Path(get_lazier_filename(OUTPUT_DIR, output_format, "_summary")) - export( - transcription="", - summary=summary, - metadata=metadata, - output_path=str(summary_path), - format_type=output_format - ) - - # Se apenas transcrever, gera arquivo só com transcrição - if should_transcribe and content and not should_summarize: - transcription_path = Path(get_lazier_filename(OUTPUT_DIR, output_format, "_transcription")) - export( - transcription=content, - summary=None, - metadata=metadata, - output_path=str(transcription_path), - format_type=output_format - ) - - # Salva cache apenas se extraiu conteúdo ou gerou sumário - if cache and (content or summary): - cache.set('web', url_hash, { - 'content': content, - 'summary': summary, - 'metadata': metadata, - 'timestamp': datetime.now().isoformat(), - }) - - # Gera arquivo consolidado apenas se ambos foram solicitados OU se apenas transcrever (sem arquivo separado) - should_generate_consolidated = False - if should_transcribe and should_summarize: - # Ambos solicitados: sempre gera consolidado - should_generate_consolidated = True - elif should_transcribe and not should_summarize: - # Apenas transcrever: gera consolidado se não tem arquivo separado - should_generate_consolidated = not transcription_path - elif should_summarize and not should_transcribe: - # Apenas sumarizar: NÃO gera consolidado (já tem arquivo separado) - should_generate_consolidated = False - - if should_generate_consolidated: - broadcast_progress(job_id, 90, 'processing', 'Gerando arquivo de saída...') - output_path = Path(get_lazier_filename(OUTPUT_DIR, output_format)) - - export( - transcription=content or "", - summary=summary if should_summarize else None, - metadata=metadata, - output_path=str(output_path), - format_type=output_format - ) - - jobs[job_id]['result_path'] = str(output_path) - - # Armazena dados separados - só armazena transcription se foi solicitado - jobs[job_id]['transcription'] = content if should_transcribe else None - jobs[job_id]['summary'] = summary - jobs[job_id]['transcription_path'] = str(transcription_path) if transcription_path else None - jobs[job_id]['summary_path'] = str(summary_path) if summary_path else None - jobs[job_id]['metadata'] = metadata - - jobs[job_id]['status'] = 'completed' - jobs[job_id]['progress'] = 100 - broadcast_progress(job_id, 100, 'completed', 'Processamento concluído') - - except Exception as e: - jobs[job_id]['status'] = 'failed' - jobs[job_id]['error'] = str(e) - broadcast_progress(job_id, 0, 'failed', f'Erro: {str(e)}') + job = _create_job( + mode=resolved_mode, + output_format=request.format, + source_name=request.url, + source_url=request.url, + input_type="url", + ) + background_tasks.add_task(_process_job, job["id"]) + return {"job_id": job["id"], "status": "processing", "mode": resolved_mode} @router.get("/jobs/{job_id}") async def get_job_status(job_id: str): - """Retorna status de um job""" - if job_id not in jobs: - raise HTTPException(status_code=404, detail="Job não encontrado") - - job = jobs[job_id] + """Retorna status de um job.""" + + job = get_job_store().get_job(job_id) + if not job: + raise HTTPException(status_code=404, detail="Job nao encontrado") + return { - "id": job['id'], - "status": job['status'], - "progress": job.get('progress', 0), - "result_path": job.get('result_path'), - "transcription_path": job.get('transcription_path'), - "summary_path": job.get('summary_path'), - "has_transcription": bool(job.get('transcription')), - "has_summary": bool(job.get('summary')), - "error": job.get('error'), + "id": job["id"], + "mode": job["mode"], + "status": job["status"], + "progress": job.get("progress", 0), + "result_path": job.get("result_path"), + "transcription_path": job.get("transcription_path"), + "summary_path": job.get("summary_path"), + "has_transcription": bool(job.get("transcription")), + "has_summary": bool(job.get("summary")), + "error": job.get("error"), } @router.get("/jobs/{job_id}/details") async def get_job_details(job_id: str): - """Retorna detalhes completos de um job (transcrição, sumário, metadados)""" - if job_id not in jobs: - raise HTTPException(status_code=404, detail="Job não encontrado") - - job = jobs[job_id] - if job['status'] != 'completed': - raise HTTPException(status_code=400, detail="Job ainda não concluído") - + """Retorna detalhes completos de um job.""" + + job = get_job_store().get_job(job_id) + if not job: + raise HTTPException(status_code=404, detail="Job nao encontrado") + + if job["status"] not in {"completed", "interrupted"}: + raise HTTPException(status_code=400, detail="Job ainda nao concluido") + return { - "id": job['id'], - "transcription": job.get('transcription'), - "summary": job.get('summary'), - "metadata": job.get('metadata', {}), - "format": job.get('format', 'docx'), - "result_path": job.get('result_path'), - "transcription_path": job.get('transcription_path'), - "summary_path": job.get('summary_path'), + "id": job["id"], + "mode": job["mode"], + "transcription": job.get("transcription"), + "summary": job.get("summary"), + "metadata": job.get("metadata", {}), + "format": job.get("format", "docx"), + "result_path": job.get("result_path"), + "transcription_path": job.get("transcription_path"), + "summary_path": job.get("summary_path"), } @router.get("/jobs/{job_id}/transcription") async def download_transcription(job_id: str): - """Download apenas da transcrição""" - if job_id not in jobs: - logger.error(f"Job {job_id} não encontrado para download de transcrição") - raise HTTPException(status_code=404, detail="Job não encontrado") - - job = jobs[job_id] - if job['status'] != 'completed': - raise HTTPException(status_code=400, detail="Job ainda não concluído") - - transcription_path = job.get('transcription_path') - if transcription_path and Path(transcription_path).exists(): - logger.info(f"Enviando transcrição de arquivo: {transcription_path}") - filename = Path(transcription_path).name - return FileResponse( - transcription_path, - media_type='application/octet-stream', - filename=filename, - headers={"Content-Disposition": f'attachment; filename="{filename}"'} - ) - - # Se não tem arquivo separado, gera um temporário - transcription = job.get('transcription') - if not transcription: - logger.error(f"Transcrição não disponível para job {job_id}") - raise HTTPException(status_code=404, detail="Transcrição não disponível") - - # Gera arquivo temporário - output_format = job.get('format', 'txt') - temp_path = OUTPUT_DIR / f"{job_id}_transcription_temp.{output_format}" - logger.info(f"Gerando arquivo temporário de transcrição: {temp_path}") - try: - export( - transcription=transcription, - summary=None, - metadata=job.get('metadata', {}), - output_path=str(temp_path), - format_type=output_format - ) - logger.info(f"Arquivo de transcrição gerado com sucesso: {temp_path}") - except Exception as e: - logger.error(f"Erro ao gerar arquivo de transcrição: {e}") - raise HTTPException(status_code=500, detail=f"Erro ao gerar arquivo: {str(e)}") - - filename = f"transcription.{output_format}" - return FileResponse( - str(temp_path), - media_type='application/octet-stream', - filename=filename, - headers={"Content-Disposition": f'attachment; filename="{filename}"'} - ) + """Download da transcricao.""" + + job = get_job_store().get_job(job_id) + if not job: + raise HTTPException(status_code=404, detail="Job nao encontrado") + + download_path = _ensure_download_file(job, "transcription") + if not download_path: + raise HTTPException(status_code=404, detail="Transcricao nao disponivel") + + filename = Path(download_path).name + return FileResponse(download_path, media_type="application/octet-stream", filename=filename) @router.get("/jobs/{job_id}/summary") async def download_summary(job_id: str): - """Download apenas do sumário""" - if job_id not in jobs: - logger.error(f"Job {job_id} não encontrado para download de sumário") - raise HTTPException(status_code=404, detail="Job não encontrado") - - job = jobs[job_id] - if job['status'] != 'completed': - raise HTTPException(status_code=400, detail="Job ainda não concluído") - - summary_path = job.get('summary_path') - if summary_path and Path(summary_path).exists(): - logger.info(f"Enviando sumário de arquivo: {summary_path}") - filename = Path(summary_path).name - return FileResponse( - summary_path, - media_type='application/octet-stream', - filename=filename, - headers={"Content-Disposition": f'attachment; filename="{filename}"'} - ) - - # Se não tem arquivo separado, gera um temporário - summary = job.get('summary') - if not summary: - logger.error(f"Sumário não disponível para job {job_id}") - raise HTTPException(status_code=404, detail="Sumário não disponível") - - # Gera arquivo temporário - output_format = job.get('format', 'txt') - temp_path = OUTPUT_DIR / f"{job_id}_summary_temp.{output_format}" - logger.info(f"Gerando arquivo temporário de sumário: {temp_path}") - try: - export( - transcription="", - summary=summary, - metadata=job.get('metadata', {}), - output_path=str(temp_path), - format_type=output_format - ) - logger.info(f"Arquivo de sumário gerado com sucesso: {temp_path}") - except Exception as e: - logger.error(f"Erro ao gerar arquivo de sumário: {e}") - raise HTTPException(status_code=500, detail=f"Erro ao gerar arquivo: {str(e)}") - - filename = f"summary.{output_format}" - return FileResponse( - str(temp_path), - media_type='application/octet-stream', - filename=filename, - headers={"Content-Disposition": f'attachment; filename="{filename}"'} - ) + """Download do sumario.""" + + job = get_job_store().get_job(job_id) + if not job: + raise HTTPException(status_code=404, detail="Job nao encontrado") + + download_path = _ensure_download_file(job, "summary") + if not download_path: + raise HTTPException(status_code=404, detail="Sumario nao disponivel") + + filename = Path(download_path).name + return FileResponse(download_path, media_type="application/octet-stream", filename=filename) @router.get("/jobs/{job_id}/download") async def download_result(job_id: str): - """Download do resultado de um job""" - if job_id not in jobs: - logger.error(f"Job {job_id} não encontrado") - raise HTTPException(status_code=404, detail="Job não encontrado") - - job = jobs[job_id] - if job['status'] != 'completed': - logger.warning(f"Job {job_id} ainda não concluído, status: {job['status']}") - raise HTTPException(status_code=400, detail="Job ainda não concluído") - - result_path = job.get('result_path') - logger.info(f"Tentando baixar job {job_id}, result_path: {result_path}") - - if not result_path: - logger.error(f"Job {job_id} não tem result_path definido") - # Tentar gerar arquivo se não existir mas job está completo - if job.get('transcription') or job.get('summary'): - logger.info(f"Regenerando arquivo para job {job_id}") - output_format = job.get('format', 'docx') - output_path = Path(get_lazier_filename(OUTPUT_DIR, output_format)) - try: - export( - transcription=job.get('transcription') or "", - summary=job.get('summary') if job.get('summarize') else None, - metadata=job.get('metadata', {}), - output_path=str(output_path), - format_type=output_format - ) - result_path = str(output_path) - job['result_path'] = result_path - logger.info(f"Arquivo regenerado: {result_path}") - except Exception as e: - logger.error(f"Erro ao regenerar arquivo: {e}") - raise HTTPException(status_code=500, detail=f"Erro ao gerar arquivo: {str(e)}") - else: - raise HTTPException(status_code=404, detail="Arquivo de resultado não encontrado e não há dados para regenerar") - - result_path_obj = Path(result_path) - if not result_path_obj.exists(): - logger.error(f"Arquivo não existe: {result_path}") - # Tentar caminho absoluto se relativo falhou - if not result_path_obj.is_absolute(): - result_path_obj = OUTPUT_DIR / result_path_obj.name - if result_path_obj.exists(): - logger.info(f"Arquivo encontrado em caminho alternativo: {result_path_obj}") - result_path = str(result_path_obj) - else: - raise HTTPException(status_code=404, detail=f"Arquivo de resultado não encontrado: {result_path}") - else: - raise HTTPException(status_code=404, detail=f"Arquivo de resultado não encontrado: {result_path}") - - logger.info(f"Enviando arquivo: {result_path}") - filename = result_path_obj.name - return FileResponse( - result_path, - media_type='application/octet-stream', - filename=filename, - headers={"Content-Disposition": f'attachment; filename="{filename}"'} - ) + """Download do artefato principal do job.""" + + job = get_job_store().get_job(job_id) + if not job: + raise HTTPException(status_code=404, detail="Job nao encontrado") + download_path = _ensure_download_file(job, "result") + if not download_path: + raise HTTPException(status_code=404, detail="Arquivo de resultado nao encontrado") -def _get_job_title(job: dict) -> str: - """Extrai título descritivo de um job""" - metadata = job.get('metadata', {}) - - # Prioridade 1: Título dos metadados (YouTube, PDF, Web) - if metadata.get('title'): - return metadata['title'] - - # Prioridade 2: Nome do arquivo (sem extensão) - if job.get('file_path'): - return Path(job['file_path']).stem - - # Prioridade 3: URL (para YouTube/Web sem título) - if job.get('url'): - # Para YouTube, pode tentar extrair título da URL - if 'youtube.com' in job['url'] or 'youtu.be' in job['url']: - return "Vídeo do YouTube" - return job['url'] - - # Fallback - return f"Job {job.get('id', 'desconhecido')}" + filename = Path(download_path).name + return FileResponse(download_path, media_type="application/octet-stream", filename=filename) @router.get("/history") async def get_history(): - """Retorna histórico de jobs com títulos descritivos""" - # Formata jobs para incluir campos necessários - formatted_jobs = [] - for job in jobs.values(): - formatted_job = { - 'id': job.get('id'), - 'status': job.get('status'), - 'progress': job.get('progress', 0), - 'title': _get_job_title(job), # Título descritivo - 'url': job.get('url'), - 'file_path': job.get('file_path'), - 'format': job.get('format', 'docx'), - 'result_path': job.get('result_path'), - 'transcription_path': job.get('transcription_path'), - 'summary_path': job.get('summary_path'), - 'has_transcription': bool(job.get('transcription')), - 'has_summary': bool(job.get('summary')), - 'error': job.get('error'), - 'created_at': job.get('created_at'), - } - formatted_jobs.append(formatted_job) - - return {"jobs": formatted_jobs} - - -@router.delete("/cache") -async def clear_cache(): - """Limpa cache""" - cache = get_cache_manager() - if cache: - count = cache.clear_all() - return {"message": f"Cache limpo: {count} chaves removidas"} - return {"message": "Cache não disponível"} + """Retorna historico persistido de jobs.""" + + jobs = [] + for job in get_job_store().list_jobs(): + jobs.append( + { + "id": job["id"], + "mode": job["mode"], + "status": job["status"], + "progress": job.get("progress", 0), + "title": _job_title(job), + "url": job.get("source_url"), + "file_path": job.get("file_path"), + "format": job.get("format", "docx"), + "result_path": job.get("result_path"), + "transcription_path": job.get("transcription_path"), + "summary_path": job.get("summary_path"), + "has_transcription": bool(job.get("transcription")), + "has_summary": bool(job.get("summary")), + "error": job.get("error"), + "created_at": job.get("created_at"), + } + ) + return {"jobs": jobs} @router.get("/supported-sites") async def get_supported_sites(): - """Retorna lista de sites que podem ser processados (vídeo/áudio via yt-dlp)""" + """Retorna lista de sites suportados.""" + return {"sites": SUPPORTED_VIDEO_SITES} diff --git a/lazier/cli.py b/lazier/cli.py @@ -1,426 +1,213 @@ """ -Interface de linha de comando (CLI) usando Click +Interface de linha de comando (CLI) usando Click. """ import os import sys +import uuid from pathlib import Path from typing import Optional -from datetime import datetime + import click -from dotenv import load_dotenv +try: + from dotenv import load_dotenv +except ImportError: # pragma: no cover - ambiente sem python-dotenv + def load_dotenv(): + return False from rich.console import Console -from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, TimeElapsedColumn from rich.panel import Panel +from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn -from .utils import ( - validate_input, - cleanup_files, - get_output_filename, - check_ffmpeg -) -from .downloader import download_youtube_audio, download_video_audio -from .audio_processor import prepare_audio_file -from .transcriber import transcribe_audio -from .summarizer import summarize_text, summarize_text_file, summarize_web_page, summarize_pdf -from .core.formats import export from .core.playlist import is_playlist_url, process_playlist -from .core.cache import get_cache_manager, calculate_file_hash, calculate_url_hash +from .core.processing import process_source +from .core.cache import get_cache_manager from .core.exceptions import MusicContentError -from .web.extractor import extract_pdf_content, extract_text_file_content +from .utils import validate_input load_dotenv() console = Console() -@click.group(invoke_without_command=True) -@click.pass_context -@click.version_option(version='0.01', prog_name='lazier') -@click.argument('input_path', required=False) -@click.option('--output', '-o', type=str, help='Nome do arquivo de saída') -@click.option('--format', '-f', type=click.Choice(['docx', 'txt', 'md', 'json', 'pdf']), default='docx', help='Formato de saída (padrão: docx)') -@click.option('--language', '-l', default='pt', help='Idioma para transcrição (padrão: pt)') -@click.option('--model', default='whisper-1', help='Modelo Whisper (padrão: whisper-1)') -@click.option('--gpt-model', default='gpt-4o-mini', help='Modelo GPT para sumarização (padrão: gpt-4o-mini)') -@click.option('--keep-files', is_flag=True, help='Não deletar arquivos temporários') -@click.option('--only-audio', is_flag=True, help='Processar apenas áudio (para vídeos)') -def cli(ctx, input_path, output, format, language, model, gpt_model, keep_files, only_audio): - """ - Lazier CLI - """ - if ctx.invoked_subcommand is None: - if input_path: - # Comando padrão: transcreve e sumariza - process_input( - input_path=input_path, - output=output, - format_type=format, - language=language, - model=model, - gpt_model=gpt_model, - keep_files=keep_files, - only_audio=only_audio, - should_summarize=True - ) - else: - # Mostra ajuda se não há argumentos - click.echo(ctx.get_help()) - - -@cli.command() -@click.argument('input_path', type=str) -@click.option('--output', '-o', type=str, help='Nome do arquivo de saída') -@click.option('--format', '-f', type=click.Choice(['docx', 'txt', 'md', 'json', 'pdf']), default='docx', help='Formato de saída') -@click.option('--language', '-l', default='pt', help='Idioma para transcrição (padrão: pt)') -@click.option('--model', default='whisper-1', help='Modelo Whisper (padrão: whisper-1)') -@click.option('--keep-files', is_flag=True, help='Não deletar arquivos temporários') -@click.option('--only-audio', is_flag=True, help='Processar apenas áudio (para vídeos)') -def transcribe(input_path: str, output: str, format: str, language: str, model: str, keep_files: bool, only_audio: bool): - """Transcreve áudio/vídeo sem sumarizar""" - process_input( - input_path=input_path, - output=output, - format_type=format, - language=language, - model=model, - keep_files=keep_files, - only_audio=only_audio, - should_summarize=False - ) - - -@cli.command() -@click.argument('input_path', type=str) -@click.option('--output', '-o', type=str, help='Nome do arquivo DOCX de saída') -@click.option('--language', '-l', default='pt', help='Idioma para transcrição (padrão: pt)') -@click.option('--model', default='whisper-1', help='Modelo Whisper (padrão: whisper-1)') -@click.option('--gpt-model', default='gpt-4o-mini', help='Modelo GPT para sumarização (padrão: gpt-4o-mini)') -@click.option('--keep-files', is_flag=True, help='Não deletar arquivos temporários') -@click.option('--only-audio', is_flag=True, help='Processar apenas áudio (para vídeos)') -def summarize(input_path: str, output: str, language: str, model: str, gpt_model: str, keep_files: bool, only_audio: bool): - """Apenas sumariza (requer arquivo de texto ou transcrição prévia)""" - click.echo("Erro: Comando 'summarize' requer texto pré-transcrito.") - click.echo("Use 'lazier <input>' para transcrição + sumarização ou 'lazier transcribe' para apenas transcrição.") - sys.exit(1) - +def _progress_notifier(progress_bar, task_id): + def callback(progress: int, _status: str, message: Optional[str] = None): + if message: + progress_bar.update(task_id, description=f"[cyan]{message}") + progress_bar.update(task_id, completed=progress) + return callback -def process_input( +def _run_mode( + *, input_path: str, - output: Optional[str] = None, - format_type: str = 'docx', - language: str = 'pt', - model: str = 'whisper-1', - gpt_model: str = 'gpt-4o-mini', - keep_files: bool = False, - only_audio: bool = False, - should_summarize: bool = True + mode: str, + output: Optional[str], + format_type: str, + model: str, + gpt_model: str, ): - """Processa o input (arquivo ou URL) com progress bars""" - with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), BarColumn(), TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), TimeElapsedColumn(), - console=console + console=console, ) as progress: - - # Validação inicial - task1 = progress.add_task("[cyan]Validando input...", total=100) + task = progress.add_task("[cyan]Preparando processamento...", total=100) + + if not os.getenv("OPENAI_API_KEY"): + console.print("[red]Erro: OPENAI_API_KEY nao encontrada.[/red]") + sys.exit(1) + is_valid, input_type, error_msg = validate_input(input_path) - if not is_valid: console.print(f"[red]Erro: {error_msg}[/red]") sys.exit(1) - - # Verifica API key - if not os.getenv('OPENAI_API_KEY'): - console.print("[red]Erro: OPENAI_API_KEY não encontrada.[/red]") - console.print("Configure a variável de ambiente ou crie um arquivo .env") - sys.exit(1) - - progress.update(task1, completed=100) - - # Verifica cache - cache = None - try: - cache = get_cache_manager() - except: - pass # Cache opcional - - files_to_cleanup = [] - metadata = {} - transcription = None - summary = None - web_video_url_hash = None - + + if input_type == "youtube" and is_playlist_url(input_path): + playlist_results = process_playlist(input_path, parallel=False) + console.print( + f"[green]Playlist processada: {playlist_results['processed']}/{playlist_results['total_videos']} videos[/green]" + ) + return + try: - # Verifica se é playlist - if input_type == 'youtube' and is_playlist_url(input_path): - task2 = progress.add_task("[yellow]Processando playlist...", total=100) - console.print(f"[yellow]Playlist detectada: {input_path}[/yellow]") - playlist_results = process_playlist(input_path, parallel=False) - console.print(f"[green]Playlist processada: {playlist_results['processed']}/{playlist_results['total_videos']} vídeos[/green]") - progress.update(task2, completed=100) - return - - # Determina arquivo a processar - task3 = progress.add_task("[cyan]Preparando arquivo...", total=100) - audio_file = None - content_data = None - - if input_type == 'youtube': - progress.update(task3, description="[cyan]Baixando vídeo do YouTube...") - url_hash = calculate_url_hash(input_path) if cache else None - - # Verifica cache - if cache: - cached = cache.get('youtube', url_hash) - if cached: - transcription = cached.get('transcription') - summary = cached.get('summary') if should_summarize else None - metadata = cached.get('metadata', {}) - console.print("[green]✓[/green] Usando cache") - progress.update(task3, completed=100) - else: - audio_file, metadata = download_youtube_audio(input_path) - files_to_cleanup.append(audio_file) - progress.update(task3, completed=50) - else: - audio_file, metadata = download_youtube_audio(input_path) - files_to_cleanup.append(audio_file) - progress.update(task3, completed=50) - - elif input_type == 'video': - progress.update(task3, description="[cyan]Extraindo áudio do vídeo...") - audio_file = prepare_audio_file(input_path, is_video=True) - if audio_file != input_path: - files_to_cleanup.append(audio_file) - progress.update(task3, completed=100) - - elif input_type == 'audio': - progress.update(task3, description="[cyan]Preparando áudio...") - audio_file = prepare_audio_file(input_path, is_video=False) - if audio_file != input_path: - files_to_cleanup.append(audio_file) - progress.update(task3, completed=100) - - elif input_type == 'web': - progress.update(task3, description="[cyan]Tentando extrair vídeo/áudio da URL...") - web_video_url_hash = calculate_url_hash(input_path) if cache else None - try: - audio_file, metadata = download_video_audio(input_path) - files_to_cleanup.append(audio_file) - if cache and web_video_url_hash: - cached = cache.get('video', web_video_url_hash) - if cached: - transcription = cached.get('transcription') - summary = cached.get('summary') if should_summarize else None - metadata = cached.get('metadata', {}) - console.print("[green]✓[/green] Usando cache") - audio_file = None - progress.update(task3, completed=100) - except MusicContentError: - console.print("[red]Conteúdo detectado como música não é processado pelo Lazier.[/red]") - cleanup_files(files_to_cleanup) - sys.exit(1) - except Exception: - progress.update(task3, description="[cyan]Extraindo texto da página web...") - content_data = extract_web_content(input_path) - metadata = {'title': content_data.get('title', 'Página Web'), 'file_path': input_path} - transcription = content_data['content'] - summary = summarize_web_page(input_path, model=gpt_model, language='pt-BR') if should_summarize else None - progress.update(task3, completed=100) - audio_file = None - - # Processa texto/PDF/web (arquivos locais) - elif Path(input_path).suffix.lower() == '.pdf': - progress.update(task3, description="[cyan]Extraindo texto do PDF...") - content_data = extract_pdf_content(input_path) - metadata = {'title': content_data.get('title', 'PDF'), 'file_path': input_path} - progress.update(task3, completed=100) - - elif Path(input_path).suffix.lower() in ['.txt', '.md', '.html']: - progress.update(task3, description="[cyan]Lendo arquivo de texto...") - content_data = extract_text_file_content(input_path) - metadata = {'title': content_data.get('title', 'Texto'), 'file_path': input_path} - progress.update(task3, completed=100) - - # Transcrição (para áudio/vídeo) - if audio_file: - task4 = progress.add_task("[magenta]Transcrevendo áudio...", total=100) - - if not transcription: - file_hash = calculate_file_hash(audio_file) if cache else None - - # Verifica cache - if cache and file_hash: - cached = cache.get('transcription', file_hash) - if cached: - transcription = cached.get('transcription') - metadata = cached.get('metadata', metadata) - console.print("[green]✓[/green] Transcrição do cache") - progress.update(task4, completed=100) - else: - transcription = transcribe_audio(audio_file, language=language, model=model) - progress.update(task4, completed=100) - - # Salva cache - if cache: - cache.set('transcription', file_hash, { - 'transcription': transcription, - 'metadata': metadata, - 'timestamp': datetime.now().isoformat(), - }) - else: - transcription = transcribe_audio(audio_file, language=language, model=model) - progress.update(task4, completed=100) - - # Sumarização - if should_summarize: - task5 = progress.add_task("[green]Gerando sumário...", total=100) - - if transcription: - # Sumariza transcrição - text_hash = calculate_url_hash(transcription) if cache else None - - if cache and text_hash: - cached = cache.get('summary', text_hash) - if cached: - summary = cached.get('summary') - console.print("[green]✓[/green] Sumário do cache") - progress.update(task5, completed=100) - else: - summary = summarize_text(transcription, model=gpt_model, language='pt-BR') - progress.update(task5, completed=100) - - if cache: - cache.set('summary', text_hash, { - 'summary': summary, - 'timestamp': datetime.now().isoformat(), - }) - else: - summary = summarize_text(transcription, model=gpt_model, language='pt-BR') - progress.update(task5, completed=100) - - elif content_data: - # Sumariza conteúdo extraído - if Path(input_path).suffix.lower() == '.pdf': - summary = summarize_pdf(input_path, model=gpt_model, language='pt-BR') - else: - summary = summarize_text_file(input_path, model=gpt_model, language='pt-BR') - transcription = content_data['content'] - progress.update(task5, completed=100) - - if web_video_url_hash and cache and transcription: - cache.set('video', web_video_url_hash, { - 'transcription': transcription, - 'summary': summary, - 'metadata': metadata, - 'timestamp': datetime.now().isoformat(), - }) - - # Gera arquivo de saída - task6 = progress.add_task(f"[blue]Gerando arquivo {format_type.upper()}...", total=100) - - # Garantir link do vídeo no cabeçalho quando o input é URL (YouTube ou vídeo genérico) - if metadata is None: - metadata = {} - if not metadata.get('webpage_url') and input_path.strip().startswith(('http://', 'https://')): - metadata = {**metadata, 'webpage_url': input_path.strip()} - - if not output: - output = get_output_filename(input_path) - # Adiciona extensão correta - output_path_obj = Path(output) - if output_path_obj.suffix != f'.{format_type}': - output = str(output_path_obj.with_suffix(f'.{format_type}')) - - export( - transcription=transcription or content_data['content'] if content_data else "", - summary=summary if should_summarize else None, - metadata=metadata, + result = process_source( + input_path, + mode=mode, + output_format=format_type, + model=model, + gpt_model=gpt_model, output_path=output, - format_type=format_type + run_id=str(uuid.uuid4()), + source_name=Path(input_path).name if not input_path.startswith(("http://", "https://")) else input_path, + progress_callback=_progress_notifier(progress, task), ) - - progress.update(task6, completed=100) - - console.print(f"\n[bold green]✓ Processamento concluído![/bold green]") - console.print(f"[cyan]Arquivo gerado:[/cyan] {output}") - - # Limpeza - if not keep_files: - cleanup_files(files_to_cleanup) - except MusicContentError: - console.print("[red]Conteúdo detectado como música não é processado pelo Lazier.[/red]") - cleanup_files(files_to_cleanup) + console.print("[red]Conteudo detectado como musica nao e processado pelo Lazier.[/red]") sys.exit(1) except KeyboardInterrupt: - console.print("\n[yellow]Operação cancelada pelo usuário.[/yellow]") - cleanup_files(files_to_cleanup) + console.print("\n[yellow]Operacao cancelada pelo usuario.[/yellow]") sys.exit(1) - except Exception as e: - console.print(f"\n[red]Erro durante processamento:[/red] {str(e)}") - cleanup_files(files_to_cleanup) + except Exception as exc: + console.print(f"\n[red]Erro durante processamento:[/red] {exc}") sys.exit(1) + console.print(f"\n[bold green]✓ Processamento concluido![/bold green]") + console.print(f"[cyan]Arquivo gerado:[/cyan] {result['result_path']}") + + +@click.group(invoke_without_command=True) +@click.pass_context +@click.version_option(version="0.01", prog_name="lazier") +@click.argument("input_path", required=False) +def cli(ctx, input_path): + """ + Lazier CLI. + """ + if ctx.invoked_subcommand is None: + if input_path: + console.print( + Panel.fit( + "Use comandos explicitos:\n\n" + "`lazier transcribe <input>` para transcrever em portugues\n" + "`lazier summarize <input>` para gerar um sumario em portugues", + title="Modo de Uso", + ) + ) + click.echo(ctx.get_help()) + @cli.command() -@click.option('--port', '-p', default=19283, help='Porta do servidor (padrão: 19283)') -@click.option('--host', default='0.0.0.0', help='Host do servidor (padrão: 0.0.0.0)') +@click.argument("input_path", type=str) +@click.option("--output", "-o", type=str, help="Nome do arquivo de saida") +@click.option("--format", "-f", "format_type", type=click.Choice(["docx", "txt", "md", "json", "pdf"]), default="docx", help="Formato de saida") +@click.option("--model", default="whisper-1", help="Modelo Whisper (padrao: whisper-1)") +@click.option("--gpt-model", default="gpt-4o-mini", help="Modelo GPT para conversao ao portugues (padrao: gpt-4o-mini)") +def transcribe(input_path: str, output: Optional[str], format_type: str, model: str, gpt_model: str): + """Transcreve ou converte o conteudo para portugues.""" + _run_mode( + input_path=input_path, + mode="transcribe", + output=output, + format_type=format_type, + model=model, + gpt_model=gpt_model, + ) + + +@cli.command() +@click.argument("input_path", type=str) +@click.option("--output", "-o", type=str, help="Nome do arquivo de saida") +@click.option("--format", "-f", "format_type", type=click.Choice(["docx", "txt", "md", "json", "pdf"]), default="docx", help="Formato de saida") +@click.option("--model", default="whisper-1", help="Modelo Whisper (padrao: whisper-1)") +@click.option("--gpt-model", default="gpt-4o-mini", help="Modelo GPT para sumario (padrao: gpt-4o-mini)") +def summarize(input_path: str, output: Optional[str], format_type: str, model: str, gpt_model: str): + """Gera um sumario em portugues do conteudo informado.""" + _run_mode( + input_path=input_path, + mode="summarize", + output=output, + format_type=format_type, + model=model, + gpt_model=gpt_model, + ) + + +@cli.command() +@click.option("--port", "-p", default=19283, help="Porta do servidor (padrao: 19283)") +@click.option("--host", default="0.0.0.0", help="Host do servidor (padrao: 0.0.0.0)") def web(port: int, host: str): - """Inicia servidor web FastAPI""" + """Inicia servidor web FastAPI.""" import uvicorn from .api.main import app - - console.print(Panel.fit( - f"[bold green]Lazier WebGUI[/bold green]\n\n" - f"Servidor iniciando em [cyan]http://{host}:{port}[/cyan]\n" - f"Pressione Ctrl+C para parar", - title="🚀 Servidor Web" - )) - + + console.print( + Panel.fit( + f"[bold green]Lazier WebGUI[/bold green]\n\n" + f"Servidor iniciando em [cyan]http://{host}:{port}[/cyan]\n" + "Pressione Ctrl+C para parar", + title="Servidor Web", + ) + ) + uvicorn.run(app, host=host, port=port) @cli.command() def cache(): - """Comandos de cache""" + """Comandos de cache.""" pass @cache.command() def clear(): - """Limpa todo o cache""" + """Limpa todo o cache.""" try: - from .core.cache import get_cache_manager - cache = get_cache_manager() - count = cache.clear_all() + cache_manager = get_cache_manager() + count = cache_manager.clear_all() console.print(f"[green]Cache limpo: {count} chaves removidas[/green]") - except Exception as e: - console.print(f"[red]Erro ao limpar cache: {e}[/red]") + except Exception as exc: + console.print(f"[red]Erro ao limpar cache: {exc}[/red]") @cache.command() def stats(): - """Mostra estatísticas do cache""" + """Mostra estatisticas do cache.""" try: - from .core.cache import get_cache_manager - cache = get_cache_manager() - stats = cache.stats() - console.print(Panel.fit( - f"[bold]Estatísticas do Cache[/bold]\n\n" - f"Total de chaves: [cyan]{stats.get('total_keys', 0)}[/cyan]\n" - f"Memória usada: [cyan]{stats.get('memory_used', 'N/A')}[/cyan]\n" - f"Memória pico: [cyan]{stats.get('memory_peak', 'N/A')}[/cyan]\n" - f"Clientes conectados: [cyan]{stats.get('connected_clients', 0)}[/cyan]", - title="📊 Cache Redis" - )) - except Exception as e: - console.print(f"[red]Erro ao obter estatísticas: {e}[/red]") + cache_manager = get_cache_manager() + stats_data = cache_manager.stats() + console.print( + Panel.fit( + f"[bold]Estatisticas do Cache[/bold]\n\n" + f"Total de chaves: [cyan]{stats_data.get('total_keys', 0)}[/cyan]\n" + f"Memoria usada: [cyan]{stats_data.get('memory_used', 'N/A')}[/cyan]\n" + f"Memoria pico: [cyan]{stats_data.get('memory_peak', 'N/A')}[/cyan]\n" + f"Clientes conectados: [cyan]{stats_data.get('connected_clients', 0)}[/cyan]", + title="Cache Redis", + ) + ) + except Exception as exc: + console.print(f"[red]Erro ao obter estatisticas: {exc}[/red]") diff --git a/lazier/core/auth.py b/lazier/core/auth.py @@ -8,7 +8,11 @@ from pathlib import Path from typing import Optional from datetime import datetime from passlib.context import CryptContext -from dotenv import load_dotenv +try: + from dotenv import load_dotenv +except ImportError: # pragma: no cover - ambiente sem python-dotenv + def load_dotenv(): + return False # Monkey patch para evitar erro durante detect_wrap_bug do passlib # O passlib tenta detectar um bug do bcrypt usando uma senha de teste que pode exceder 72 bytes diff --git a/lazier/core/cache.py b/lazier/core/cache.py @@ -7,9 +7,22 @@ import json import hashlib from typing import Optional, Dict, Any, Union from datetime import timedelta -import redis -from redis.exceptions import ConnectionError, RedisError -from dotenv import load_dotenv +try: + import redis + from redis.exceptions import ConnectionError, RedisError +except ImportError: # pragma: no cover - ambiente sem redis instalado + redis = None + + class ConnectionError(Exception): + pass + + class RedisError(Exception): + pass +try: + from dotenv import load_dotenv +except ImportError: # pragma: no cover - ambiente sem python-dotenv + def load_dotenv(): + return False load_dotenv() @@ -19,6 +32,8 @@ class CacheManager: def __init__(self): """Inicializa conexão com Redis""" + if redis is None: + raise Exception("Redis não está instalado neste ambiente.") self.redis_host = os.getenv('REDIS_HOST', 'localhost') self.redis_port = int(os.getenv('REDIS_PORT', 6379)) self.redis_db = int(os.getenv('REDIS_DB', 0)) diff --git a/lazier/core/formats.py b/lazier/core/formats.py @@ -6,9 +6,13 @@ import json from datetime import datetime from pathlib import Path from typing import Optional, Dict, Any -from docx import Document -from docx.shared import Pt, Inches, RGBColor -from docx.enum.text import WD_ALIGN_PARAGRAPH +try: + from docx import Document # noqa: F401 + from docx.shared import Pt, Inches, RGBColor # noqa: F401 + from docx.enum.text import WD_ALIGN_PARAGRAPH # noqa: F401 +except ImportError: # pragma: no cover - ambiente sem python-docx + Document = None + Pt = Inches = RGBColor = WD_ALIGN_PARAGRAPH = None from ..docx_generator import _format_duration from ..utils import sanitize_xml_string @@ -82,18 +86,19 @@ def export_txt( lines.append("") # Transcrição - lines.append("TRANSCRIÇÃO COMPLETA") - lines.append("-" * 80) - - # Divide transcrição em parágrafos e preserva estrutura - transcription_paragraphs = transcription.split('\n\n') - if len(transcription_paragraphs) == 1: - transcription_paragraphs = transcription.split('\n') - - for para in transcription_paragraphs: - if para.strip(): - lines.append(para.strip()) - lines.append("") # Linha em branco entre parágrafos + if transcription and transcription.strip(): + lines.append("TRANSCRIÇÃO COMPLETA") + lines.append("-" * 80) + + # Divide transcrição em parágrafos e preserva estrutura + transcription_paragraphs = transcription.split('\n\n') + if len(transcription_paragraphs) == 1: + transcription_paragraphs = transcription.split('\n') + + for para in transcription_paragraphs: + if para.strip(): + lines.append(para.strip()) + lines.append("") # Linha em branco entre parágrafos # Salva arquivo with open(output_path_obj, 'w', encoding='utf-8') as f: @@ -168,18 +173,19 @@ def export_markdown( lines.append("") # Transcrição - lines.append("## Transcrição Completa") - lines.append("") - - # Divide transcrição em parágrafos e formata corretamente - transcription_paragraphs = transcription.split('\n\n') - if len(transcription_paragraphs) == 1: - transcription_paragraphs = transcription.split('\n') - - for para in transcription_paragraphs: - if para.strip(): - lines.append(para.strip()) - lines.append("") # Linha em branco entre parágrafos (Markdown requer) + if transcription and transcription.strip(): + lines.append("## Transcrição Completa") + lines.append("") + + # Divide transcrição em parágrafos e formata corretamente + transcription_paragraphs = transcription.split('\n\n') + if len(transcription_paragraphs) == 1: + transcription_paragraphs = transcription.split('\n') + + for para in transcription_paragraphs: + if para.strip(): + lines.append(para.strip()) + lines.append("") # Linha em branco entre parágrafos (Markdown requer) # Salva arquivo with open(output_path_obj, 'w', encoding='utf-8') as f: @@ -217,7 +223,7 @@ def export_json( 'uploader': metadata.get('uploader') if metadata else None, 'webpage_url': metadata.get('webpage_url') if metadata else None, }, - 'transcription': transcription, + 'transcription': transcription if transcription and transcription.strip() else None, 'summary': summary, } @@ -325,14 +331,15 @@ def export_pdf( story.append(Paragraph(text, body_style)) story.append(Spacer(1, 16)) - story.append(Paragraph("Transcrição Completa", heading_style)) - story.append(Spacer(1, 8)) transcription_safe = sanitize_xml_string(transcription or "") - for para in (transcription_safe.split('\n\n') if transcription_safe else []): - if not para.strip(): - continue - text = para.strip().replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('\n', '<br/>').replace('**', '') - story.append(Paragraph(text, body_style)) + if transcription_safe.strip(): + story.append(Paragraph("Transcrição Completa", heading_style)) + story.append(Spacer(1, 8)) + for para in transcription_safe.split('\n\n'): + if not para.strip(): + continue + text = para.strip().replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('\n', '<br/>').replace('**', '') + story.append(Paragraph(text, body_style)) doc.build(story) return str(output_path_obj) diff --git a/lazier/core/jobs.py b/lazier/core/jobs.py @@ -0,0 +1,274 @@ +""" +Persistencia de jobs e organizacao de arquivos de saida. +""" + +import json +import os +import re +import sqlite3 +import threading +import unicodedata +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Optional, Union + + +TERMINAL_STATUSES = {"completed", "failed", "interrupted"} +ARTIFACT_FILENAMES = { + "transcription": "transcricao", + "summary": "sumario", + "result": "resultado", +} + + +def _coerce_datetime(value: Optional[Union[str, datetime]]) -> datetime: + if isinstance(value, datetime): + return value + if isinstance(value, str) and value: + return datetime.fromisoformat(value) + return datetime.now() + + +def get_jobs_db_path() -> Path: + data_dir = Path(os.getenv("LAZIER_DATA_DIR", "/app/data")) + data_dir.mkdir(parents=True, exist_ok=True) + return data_dir / "jobs.db" + + +def get_outputs_root() -> Path: + output_dir = Path(os.getenv("LAZIER_OUTPUT_DIR", "/app/outputs")) + output_dir.mkdir(parents=True, exist_ok=True) + return output_dir + + +def slugify_source_name(source_name: Optional[str], fallback: str = "job") -> str: + base = source_name or fallback + base = Path(base).stem + normalized = unicodedata.normalize("NFKD", base) + ascii_only = normalized.encode("ascii", "ignore").decode("ascii") + ascii_only = ascii_only.lower() + ascii_only = re.sub(r"[^a-z0-9]+", "-", ascii_only).strip("-") + return ascii_only[:40] or fallback + + +def build_job_output_dir( + job_id: str, + source_name: Optional[str] = None, + created_at: Optional[Union[str, datetime]] = None, + output_root: Optional[Path] = None, +) -> Path: + timestamp = _coerce_datetime(created_at) + root = output_root or get_outputs_root() + slug = slugify_source_name(source_name, fallback="job") + folder_name = f"{slug}-{job_id[:8]}" + return root / timestamp.strftime("%Y") / timestamp.strftime("%m") / timestamp.strftime("%d") / folder_name + + +def build_job_artifact_path( + job_id: str, + source_name: Optional[str], + format_type: str, + artifact_kind: str, + created_at: Optional[Union[str, datetime]] = None, + output_root: Optional[Path] = None, +) -> Path: + if artifact_kind not in ARTIFACT_FILENAMES: + raise ValueError(f"Artefato nao suportado: {artifact_kind}") + + output_dir = build_job_output_dir( + job_id=job_id, + source_name=source_name, + created_at=created_at, + output_root=output_root, + ) + output_dir.mkdir(parents=True, exist_ok=True) + filename = ARTIFACT_FILENAMES[artifact_kind] + return output_dir / f"{filename}.{format_type}" + + +class JobStore: + """Repositorio SQLite para jobs da WebGUI.""" + + def __init__(self, db_path: Optional[Path] = None): + self.db_path = Path(db_path or get_jobs_db_path()) + self.db_path.parent.mkdir(parents=True, exist_ok=True) + self._lock = threading.Lock() + self.init_db() + + def _connect(self) -> sqlite3.Connection: + connection = sqlite3.connect(self.db_path) + connection.row_factory = sqlite3.Row + return connection + + def init_db(self) -> None: + with self._connect() as conn: + conn.execute( + """ + CREATE TABLE IF NOT EXISTS jobs ( + id TEXT PRIMARY KEY, + mode TEXT NOT NULL, + status TEXT NOT NULL, + progress INTEGER NOT NULL DEFAULT 0, + input_type TEXT, + source_name TEXT, + source_url TEXT, + file_path TEXT, + format TEXT NOT NULL, + metadata_json TEXT NOT NULL DEFAULT '{}', + transcription TEXT, + summary TEXT, + result_path TEXT, + transcription_path TEXT, + summary_path TEXT, + error TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + completed_at TEXT + ) + """ + ) + conn.commit() + + def reset(self) -> None: + with self._connect() as conn: + conn.execute("DELETE FROM jobs") + conn.commit() + + def create_job(self, job_data: Dict[str, Any]) -> Dict[str, Any]: + now = datetime.now().isoformat() + payload = { + "id": job_data["id"], + "mode": job_data["mode"], + "status": job_data.get("status", "pending"), + "progress": int(job_data.get("progress", 0)), + "input_type": job_data.get("input_type"), + "source_name": job_data.get("source_name"), + "source_url": job_data.get("source_url"), + "file_path": job_data.get("file_path"), + "format": job_data.get("format", "docx"), + "metadata_json": json.dumps(job_data.get("metadata", {}), ensure_ascii=False), + "transcription": job_data.get("transcription"), + "summary": job_data.get("summary"), + "result_path": job_data.get("result_path"), + "transcription_path": job_data.get("transcription_path"), + "summary_path": job_data.get("summary_path"), + "error": job_data.get("error"), + "created_at": job_data.get("created_at", now), + "updated_at": job_data.get("updated_at", job_data.get("created_at", now)), + "completed_at": job_data.get("completed_at"), + } + + with self._lock, self._connect() as conn: + conn.execute( + """ + INSERT INTO jobs ( + id, mode, status, progress, input_type, source_name, source_url, file_path, + format, metadata_json, transcription, summary, result_path, + transcription_path, summary_path, error, created_at, updated_at, completed_at + ) VALUES ( + :id, :mode, :status, :progress, :input_type, :source_name, :source_url, :file_path, + :format, :metadata_json, :transcription, :summary, :result_path, + :transcription_path, :summary_path, :error, :created_at, :updated_at, :completed_at + ) + """, + payload, + ) + conn.commit() + return self.get_job(payload["id"]) + + def update_job(self, job_id: str, **updates: Any) -> Dict[str, Any]: + if not updates: + job = self.get_job(job_id) + if job is None: + raise KeyError(f"Job nao encontrado: {job_id}") + return job + + assignments = [] + values: Dict[str, Any] = {"id": job_id} + + if "metadata" in updates: + updates["metadata_json"] = json.dumps(updates.pop("metadata") or {}, ensure_ascii=False) + + if "status" in updates and updates["status"] in TERMINAL_STATUSES and "completed_at" not in updates: + updates["completed_at"] = datetime.now().isoformat() + + updates["updated_at"] = datetime.now().isoformat() + + for key, value in updates.items(): + assignments.append(f"{key} = :{key}") + values[key] = value + + with self._lock, self._connect() as conn: + cursor = conn.execute( + f"UPDATE jobs SET {', '.join(assignments)} WHERE id = :id", + values, + ) + conn.commit() + if cursor.rowcount == 0: + raise KeyError(f"Job nao encontrado: {job_id}") + + return self.get_job(job_id) + + def get_job(self, job_id: str) -> Optional[Dict[str, Any]]: + with self._connect() as conn: + row = conn.execute("SELECT * FROM jobs WHERE id = ?", (job_id,)).fetchone() + return self._row_to_job(row) if row else None + + def list_jobs(self) -> List[Dict[str, Any]]: + with self._connect() as conn: + rows = conn.execute("SELECT * FROM jobs ORDER BY created_at DESC").fetchall() + return [self._row_to_job(row) for row in rows] + + def mark_incomplete_as_interrupted(self) -> int: + interrupted_at = datetime.now().isoformat() + with self._lock, self._connect() as conn: + cursor = conn.execute( + """ + UPDATE jobs + SET status = 'interrupted', + progress = CASE WHEN progress > 0 THEN progress ELSE 0 END, + error = COALESCE(error, 'Processamento interrompido por reinicializacao do servidor.'), + updated_at = ?, + completed_at = ? + WHERE status IN ('pending', 'processing') + """, + (interrupted_at, interrupted_at), + ) + conn.commit() + return cursor.rowcount + + def _row_to_job(self, row: sqlite3.Row) -> Dict[str, Any]: + return { + "id": row["id"], + "mode": row["mode"], + "status": row["status"], + "progress": row["progress"], + "input_type": row["input_type"], + "source_name": row["source_name"], + "source_url": row["source_url"], + "file_path": row["file_path"], + "format": row["format"], + "metadata": json.loads(row["metadata_json"] or "{}"), + "transcription": row["transcription"], + "summary": row["summary"], + "result_path": row["result_path"], + "transcription_path": row["transcription_path"], + "summary_path": row["summary_path"], + "error": row["error"], + "created_at": row["created_at"], + "updated_at": row["updated_at"], + "completed_at": row["completed_at"], + } + + +_job_store: Optional[JobStore] = None +_job_store_path: Optional[Path] = None + + +def get_job_store() -> JobStore: + global _job_store, _job_store_path + db_path = get_jobs_db_path() + if _job_store is None or _job_store_path != db_path: + _job_store = JobStore(db_path) + _job_store_path = db_path + return _job_store diff --git a/lazier/core/processing.py b/lazier/core/processing.py @@ -0,0 +1,400 @@ +""" +Pipeline compartilhado de processamento para CLI e API. +""" + +import os +from datetime import datetime +from pathlib import Path +from typing import Any, Callable, Dict, Optional + +from .cache import calculate_file_hash, calculate_url_hash, get_cache_manager +from .exceptions import MusicContentError +from .jobs import build_job_artifact_path, get_outputs_root +from ..audio_processor import prepare_audio_file +from ..downloader import download_video_audio, download_youtube_audio +from ..summarizer import render_text_in_portuguese, summarize_text +from ..transcriber import transcribe_audio +from ..utils import cleanup_files, validate_input +from ..web.extractor import extract_pdf_content, extract_text_file_content, extract_web_content +from .formats import export + +ProgressCallback = Optional[Callable[[int, str, Optional[str]], None]] + + +def _notify(callback: ProgressCallback, progress: int, status: str, message: Optional[str] = None) -> None: + if callback: + callback(progress, status, message) + + +def _get_cache_manager_safe(): + try: + return get_cache_manager() + except Exception: + return None + + +def _ensure_api_key() -> None: + if not os.getenv("OPENAI_API_KEY"): + raise Exception( + "OPENAI_API_KEY nao encontrada. Configure a variavel de ambiente OPENAI_API_KEY " + "ou crie um arquivo .env" + ) + + +def _ensure_mode(mode: str) -> str: + if mode not in {"transcribe", "summarize"}: + raise ValueError("Modo invalido. Use 'transcribe' ou 'summarize'.") + return mode + + +def _export_selected_artifact( + mode: str, + format_type: str, + output_path: str, + transcription: str, + summary: Optional[str], + metadata: Dict[str, Any], +) -> str: + if mode == "transcribe": + return export( + transcription=transcription, + summary=None, + metadata=metadata, + output_path=output_path, + format_type=format_type, + ) + return export( + transcription="", + summary=summary, + metadata=metadata, + output_path=output_path, + format_type=format_type, + ) + + +def _transcribe_media_to_portuguese( + audio_file: str, + cache_prefix: str, + cache_identifier: str, + model: str, + gpt_model: str, + metadata: Dict[str, Any], + progress_callback: ProgressCallback, + progress_start: int = 30, + progress_end: int = 70, +) -> str: + cache = _get_cache_manager_safe() + cached = cache.get(cache_prefix, cache_identifier) if cache else None + if cached and cached.get("transcription"): + _notify(progress_callback, progress_end, "processing", "Transcricao encontrada no cache") + return cached["transcription"] + + midpoint = progress_start + ((progress_end - progress_start) // 2) + _notify(progress_callback, progress_start, "processing", "Transcrevendo audio...") + raw_transcription = transcribe_audio(audio_file, language=None, model=model) + _notify(progress_callback, midpoint, "processing", "Convertendo conteudo para portugues...") + portuguese_text = render_text_in_portuguese(raw_transcription, model=gpt_model) + + if cache: + cache.set( + cache_prefix, + cache_identifier, + { + "transcription": portuguese_text, + "metadata": metadata, + "timestamp": datetime.now().isoformat(), + }, + ) + + _notify(progress_callback, progress_end, "processing", "Transcricao concluida") + return portuguese_text + + +def _summarize_portuguese_text( + text: str, + model: str, + progress_callback: ProgressCallback, + progress_start: int = 75, + progress_end: int = 88, +) -> str: + cache = _get_cache_manager_safe() + text_hash = calculate_url_hash(text) + cached = cache.get("summary", text_hash) if cache else None + if cached and cached.get("summary"): + _notify(progress_callback, progress_end, "processing", "Sumario encontrado no cache") + return cached["summary"] + + _notify(progress_callback, progress_start, "processing", "Gerando sumario em portugues...") + summary = summarize_text(text, model=model, language="pt-BR") + if cache: + cache.set( + "summary", + text_hash, + { + "summary": summary, + "timestamp": datetime.now().isoformat(), + }, + ) + _notify(progress_callback, progress_end, "processing", "Sumario concluido") + return summary + + +def process_source( + source: str, + *, + mode: str, + output_format: str = "docx", + model: str = "whisper-1", + gpt_model: str = "gpt-4o-mini", + output_path: Optional[str] = None, + output_root: Optional[Path] = None, + run_id: Optional[str] = None, + source_name: Optional[str] = None, + created_at: Optional[str] = None, + progress_callback: ProgressCallback = None, + keep_files: bool = False, +) -> Dict[str, Any]: + """ + Processa um input e retorna texto em portugues, sumario e caminhos de saida. + """ + + _ensure_api_key() + mode = _ensure_mode(mode) + _notify(progress_callback, 5, "processing", "Validando entrada...") + is_valid, input_type, error_msg = validate_input(source) + if not is_valid: + raise Exception(error_msg) + + cache = _get_cache_manager_safe() + files_to_cleanup = [] + metadata: Dict[str, Any] = {} + portuguese_text: Optional[str] = None + summary: Optional[str] = None + + try: + output_root = output_root or get_outputs_root() + + if input_type == "youtube": + url_hash = calculate_url_hash(source) if cache else "" + cached = cache.get("youtube", url_hash) if cache else None + if cached and cached.get("transcription"): + metadata = cached.get("metadata", {}) + portuguese_text = cached.get("transcription") + summary = cached.get("summary") + _notify(progress_callback, 70, "processing", "Conteudo do YouTube encontrado no cache") + if mode == "summarize" and not summary: + summary = _summarize_portuguese_text(portuguese_text, gpt_model, progress_callback) + if cache: + cache.set( + "youtube", + url_hash, + { + "transcription": portuguese_text, + "summary": summary, + "metadata": metadata, + "timestamp": datetime.now().isoformat(), + }, + ) + else: + _notify(progress_callback, 15, "processing", "Baixando video do YouTube...") + audio_file, metadata = download_youtube_audio(source) + files_to_cleanup.append(audio_file) + portuguese_text = _transcribe_media_to_portuguese( + audio_file=audio_file, + cache_prefix="transcription", + cache_identifier=calculate_file_hash(audio_file), + model=model, + gpt_model=gpt_model, + metadata=metadata, + progress_callback=progress_callback, + ) + if mode == "summarize": + summary = _summarize_portuguese_text(portuguese_text, gpt_model, progress_callback) + if cache: + cache.set( + "youtube", + url_hash, + { + "transcription": portuguese_text, + "summary": summary, + "metadata": metadata, + "timestamp": datetime.now().isoformat(), + }, + ) + + elif input_type == "web": + url_hash = calculate_url_hash(source) if cache else "" + try: + _notify(progress_callback, 15, "processing", "Tentando extrair audio da URL...") + audio_file, metadata = download_video_audio(source) + files_to_cleanup.append(audio_file) + cached = cache.get("video", url_hash) if cache else None + if cached and cached.get("transcription"): + metadata = cached.get("metadata", metadata) + portuguese_text = cached.get("transcription") + summary = cached.get("summary") + _notify(progress_callback, 70, "processing", "Conteudo de video encontrado no cache") + if mode == "summarize" and not summary: + summary = _summarize_portuguese_text(portuguese_text, gpt_model, progress_callback) + if cache: + cache.set( + "video", + url_hash, + { + "transcription": portuguese_text, + "summary": summary, + "metadata": metadata, + "timestamp": datetime.now().isoformat(), + }, + ) + else: + portuguese_text = _transcribe_media_to_portuguese( + audio_file=audio_file, + cache_prefix="transcription", + cache_identifier=calculate_file_hash(audio_file), + model=model, + gpt_model=gpt_model, + metadata=metadata, + progress_callback=progress_callback, + ) + if mode == "summarize": + summary = _summarize_portuguese_text(portuguese_text, gpt_model, progress_callback) + if cache: + cache.set( + "video", + url_hash, + { + "transcription": portuguese_text, + "summary": summary, + "metadata": metadata, + "timestamp": datetime.now().isoformat(), + }, + ) + except MusicContentError: + raise + except Exception: + cached = cache.get("web", url_hash) if cache else None + if cached and cached.get("content"): + metadata = cached.get("metadata", {}) + portuguese_text = cached.get("content") + summary = cached.get("summary") + _notify(progress_callback, 70, "processing", "Conteudo web encontrado no cache") + if mode == "summarize" and not summary: + summary = _summarize_portuguese_text(portuguese_text, gpt_model, progress_callback) + if cache: + cache.set( + "web", + url_hash, + { + "content": portuguese_text, + "summary": summary, + "metadata": metadata, + "timestamp": datetime.now().isoformat(), + }, + ) + else: + _notify(progress_callback, 20, "processing", "Extraindo texto da pagina web...") + content_data = extract_web_content(source) + metadata = { + "title": content_data.get("title", "Pagina Web"), + "webpage_url": source, + } + portuguese_text = render_text_in_portuguese(content_data["content"], model=gpt_model) + _notify(progress_callback, 70, "processing", "Texto convertido para portugues") + if mode == "summarize": + summary = _summarize_portuguese_text(portuguese_text, gpt_model, progress_callback) + if cache: + cache.set( + "web", + url_hash, + { + "content": portuguese_text, + "summary": summary, + "metadata": metadata, + "timestamp": datetime.now().isoformat(), + }, + ) + + elif input_type in {"audio", "video"}: + _notify(progress_callback, 15, "processing", "Preparando audio...") + audio_file = prepare_audio_file(source, is_video=(input_type == "video")) + if audio_file != source: + files_to_cleanup.append(audio_file) + portuguese_text = _transcribe_media_to_portuguese( + audio_file=audio_file, + cache_prefix="transcription", + cache_identifier=calculate_file_hash(audio_file), + model=model, + gpt_model=gpt_model, + metadata=metadata, + progress_callback=progress_callback, + ) + if mode == "summarize": + summary = _summarize_portuguese_text(portuguese_text, gpt_model, progress_callback) + + elif input_type in {"pdf", "text"}: + _notify(progress_callback, 20, "processing", "Extraindo conteudo do arquivo...") + if input_type == "pdf": + content_data = extract_pdf_content(source) + else: + content_data = extract_text_file_content(source) + metadata = { + "title": content_data.get("title", "Documento"), + "file_path": source, + } + portuguese_text = render_text_in_portuguese(content_data["content"], model=gpt_model) + _notify(progress_callback, 70, "processing", "Conteudo convertido para portugues") + if mode == "summarize": + summary = _summarize_portuguese_text(portuguese_text, gpt_model, progress_callback) + + if metadata is None: + metadata = {} + if source.startswith(("http://", "https://")) and not metadata.get("webpage_url"): + metadata = {**metadata, "webpage_url": source} + + run_id = run_id or datetime.now().strftime("%Y%m%d%H%M%S%f") + resolved_source_name = source_name or metadata.get("title") or source + + if output_path: + final_output_path = Path(output_path) + if final_output_path.suffix != f".{output_format}": + final_output_path = final_output_path.with_suffix(f".{output_format}") + else: + artifact_kind = "transcription" if mode == "transcribe" else "summary" + final_output_path = build_job_artifact_path( + job_id=run_id, + source_name=resolved_source_name, + format_type=output_format, + artifact_kind=artifact_kind, + created_at=created_at, + output_root=output_root, + ) + + _notify(progress_callback, 92, "processing", f"Gerando arquivo {output_format.upper()}...") + exported_path = _export_selected_artifact( + mode=mode, + format_type=output_format, + output_path=str(final_output_path), + transcription=portuguese_text or "", + summary=summary, + metadata=metadata, + ) + + output_dir = str(Path(exported_path).parent) + result = { + "mode": mode, + "input_type": input_type, + "source_name": resolved_source_name, + "metadata": metadata, + "transcription": portuguese_text, + "summary": summary, + "result_path": exported_path, + "transcription_path": exported_path if mode == "transcribe" else None, + "summary_path": exported_path if mode == "summarize" else None, + "output_dir": output_dir, + } + _notify(progress_callback, 100, "completed", "Processamento concluido") + return result + finally: + if not keep_files: + cleanup_files(files_to_cleanup) diff --git a/lazier/docx_generator.py b/lazier/docx_generator.py @@ -5,9 +5,13 @@ Módulo para geração de arquivos DOCX com transcrição e sumário import re from datetime import datetime from pathlib import Path -from docx import Document -from docx.shared import Pt, Inches, RGBColor -from docx.enum.text import WD_ALIGN_PARAGRAPH +try: + from docx import Document + from docx.shared import Pt, Inches, RGBColor + from docx.enum.text import WD_ALIGN_PARAGRAPH +except ImportError: # pragma: no cover - ambiente sem python-docx + Document = None + Pt = Inches = RGBColor = WD_ALIGN_PARAGRAPH = None from typing import Optional, Dict, Any from .utils import sanitize_xml_string @@ -31,6 +35,8 @@ def create_document( Returns: Caminho do arquivo criado """ + if Document is None: + raise Exception("python-docx não está instalado neste ambiente.") doc = Document() # Configuração de estilo padrão @@ -90,20 +96,19 @@ def create_document( doc.add_paragraph() # Seção de Transcrição - doc.add_heading('Transcrição Completa', level=2) - - # Sanitizar transcrição antes de processar transcription = sanitize_xml_string(transcription) - - # Divide transcrição em parágrafos - transcription_paragraphs = transcription.split('\n\n') - if len(transcription_paragraphs) == 1: - # Se não tem \n\n, tenta dividir por \n - transcription_paragraphs = transcription.split('\n') - - for para_text in transcription_paragraphs: - if para_text.strip(): - _add_markdown_paragraph(doc, para_text.strip(), is_summary=False) + if transcription.strip(): + doc.add_heading('Transcrição Completa', level=2) + + # Divide transcrição em parágrafos + transcription_paragraphs = transcription.split('\n\n') + if len(transcription_paragraphs) == 1: + # Se não tem \n\n, tenta dividir por \n + transcription_paragraphs = transcription.split('\n') + + for para_text in transcription_paragraphs: + if para_text.strip(): + _add_markdown_paragraph(doc, para_text.strip(), is_summary=False) # Rodapé doc.add_paragraph() diff --git a/lazier/downloader.py b/lazier/downloader.py @@ -10,7 +10,11 @@ import re from pathlib import Path from typing import Optional, Dict, Any, Tuple, Type from urllib.parse import urlparse -import yt_dlp + +try: + import yt_dlp +except ImportError: # pragma: no cover - ambiente sem yt-dlp + yt_dlp = None from .core.exceptions import ( YouTubeDownloadError, @@ -21,6 +25,11 @@ from .core.exceptions import ( logger = logging.getLogger(__name__) + +def _require_yt_dlp() -> None: + if yt_dlp is None: + raise Exception("yt-dlp não está instalado neste ambiente.") + # Domínios exclusivamente de música: não processar em hipótese alguma MUSIC_ONLY_DOMAINS = frozenset({ 'spotify.com', 'open.spotify.com', 'music.apple.com', 'itunes.apple.com', @@ -244,6 +253,7 @@ def download_youtube_audio(url: str, output_dir: Optional[str] = None) -> tuple[ MusicContentError: Se o conteúdo for detectado como música Exception: Se o download falhar após todas as tentativas """ + _require_yt_dlp() if is_music_domain(url): raise MusicContentError( "Conteúdo detectado como música não é processado pelo Lazier." @@ -462,6 +472,7 @@ def download_video_audio(url: str, output_dir: Optional[str] = None) -> tuple[st MusicContentError: Se o conteúdo for detectado como música Exception: Se a URL não for vídeo ou o download falhar """ + _require_yt_dlp() if is_music_domain(url): raise MusicContentError( "Conteúdo detectado como música não é processado pelo Lazier." diff --git a/lazier/summarizer.py b/lazier/summarizer.py @@ -5,8 +5,15 @@ Suporta textos, páginas web e PDFs import os from typing import Optional -from openai import OpenAI -from dotenv import load_dotenv +try: + from openai import OpenAI +except ImportError: # pragma: no cover - ambiente sem openai + OpenAI = None +try: + from dotenv import load_dotenv +except ImportError: # pragma: no cover - ambiente sem python-dotenv + def load_dotenv(): + return False from .web.extractor import extract_web_content, extract_pdf_content, extract_text_file_content @@ -31,7 +38,9 @@ def summarize_text(text: str, model: str = 'gpt-4o-mini', language: str = 'pt-BR "OPENAI_API_KEY não encontrada. " "Configure a variável de ambiente OPENAI_API_KEY ou crie um arquivo .env" ) - + if OpenAI is None: + raise Exception("openai não está instalado neste ambiente.") + if not text or not text.strip(): return "Texto vazio - não é possível gerar sumário." @@ -73,6 +82,71 @@ def summarize_text(text: str, model: str = 'gpt-4o-mini', language: str = 'pt-BR return valid_summaries[0] +def render_text_in_portuguese(text: str, model: str = 'gpt-4o-mini') -> str: + """ + Converte qualquer texto para portugues do Brasil preservando detalhes. + Se o texto ja estiver em portugues, apenas normaliza a redacao. + """ + api_key = os.getenv('OPENAI_API_KEY') + if not api_key: + raise Exception( + "OPENAI_API_KEY não encontrada. " + "Configure a variável de ambiente OPENAI_API_KEY ou crie um arquivo .env" + ) + if OpenAI is None: + raise Exception("openai não está instalado neste ambiente.") + + if not text or not text.strip(): + return "" + + max_chars = 300000 + if len(text) <= max_chars: + return _render_portuguese_chunk(text, model) + + chunks = _split_text_into_chunks(text, max_chars) + rendered_chunks = [] + print(f"Texto longo detectado ({len(text)} caracteres). Convertendo {len(chunks)} partes para portugues...") + for i, chunk in enumerate(chunks): + print(f"Convertendo parte {i+1}/{len(chunks)}...") + rendered_chunks.append(_render_portuguese_chunk(chunk, model)) + return "\n\n".join(chunk.strip() for chunk in rendered_chunks if chunk.strip()) + + +def _render_portuguese_chunk(text: str, model: str) -> str: + api_key = os.getenv('OPENAI_API_KEY') + client = OpenAI(api_key=api_key) + prompt = """Converta o texto a seguir para portugues do Brasil. + +Regras: +- Se o texto ja estiver em portugues, mantenha em portugues do Brasil natural. +- Nao resuma, nao explique, nao comente o texto. +- Preserve nomes proprios, numeros, datas, links, listas e estrutura. +- Mantenha o maximo de fidelidade possivel ao conteudo original. + +Texto: + +""" + + try: + response = client.chat.completions.create( + model=model, + messages=[ + { + "role": "system", + "content": "Voce e um tradutor tecnico e editorial que preserva fielmente o conteudo." + }, + {"role": "user", "content": prompt + text} + ], + temperature=0.1, + ) + return response.choices[0].message.content.strip() + except Exception as e: + error_msg = str(e) + if 'api_key' in error_msg.lower() or 'authentication' in error_msg.lower(): + raise Exception("Erro de autenticação com OpenAI API. Verifique sua OPENAI_API_KEY.") + raise Exception(f"Erro ao converter texto para português: {error_msg}") + + def _summarize_chunk(text: str, model: str, language: str, is_final: bool = False) -> str: """Sumariza um chunk de texto""" api_key = os.getenv('OPENAI_API_KEY') diff --git a/lazier/transcriber.py b/lazier/transcriber.py @@ -5,8 +5,15 @@ Módulo para transcrição de áudio usando OpenAI Whisper API import os from pathlib import Path from typing import Optional -from openai import OpenAI -from dotenv import load_dotenv +try: + from openai import OpenAI +except ImportError: # pragma: no cover - ambiente sem openai + OpenAI = None +try: + from dotenv import load_dotenv +except ImportError: # pragma: no cover - ambiente sem python-dotenv + def load_dotenv(): + return False load_dotenv() @@ -14,14 +21,14 @@ load_dotenv() from .audio_processor import split_audio -def transcribe_audio(audio_path: str, language: str = 'pt', model: str = 'whisper-1') -> str: +def transcribe_audio(audio_path: str, language: Optional[str] = None, model: str = 'whisper-1') -> str: """ Transcreve um arquivo de áudio usando OpenAI Whisper API, com suporte a divisão automática de arquivos grandes. Args: audio_path: Caminho do arquivo de áudio - language: Código do idioma (pt para português) + language: Codigo do idioma. Quando None, a API detecta automaticamente. model: Modelo Whisper a usar (padrão: whisper-1) Returns: @@ -33,7 +40,9 @@ def transcribe_audio(audio_path: str, language: str = 'pt', model: str = 'whispe "OPENAI_API_KEY não encontrada. " "Configure a variável de ambiente OPENAI_API_KEY ou crie um arquivo .env" ) - + if OpenAI is None: + raise Exception("openai não está instalado neste ambiente.") + if not os.path.exists(audio_path): raise FileNotFoundError(f"Arquivo de áudio não encontrado: {audio_path}") @@ -52,12 +61,14 @@ def transcribe_audio(audio_path: str, language: str = 'pt', model: str = 'whispe for i, chunk_path in enumerate(chunks): print(f"Processando chunk {i+1}/{len(chunks)}...") with open(chunk_path, 'rb') as audio_file: - transcript = client.audio.transcriptions.create( - model=model, - file=audio_file, - language=language, - response_format='text' - ) + request_kwargs = { + 'model': model, + 'file': audio_file, + 'response_format': 'text' + } + if language: + request_kwargs['language'] = language + transcript = client.audio.transcriptions.create(**request_kwargs) if hasattr(transcript, 'text'): text = transcript.text @@ -70,12 +81,14 @@ def transcribe_audio(audio_path: str, language: str = 'pt', model: str = 'whispe else: # Caso contrário, transcreve direto with open(audio_path, 'rb') as audio_file: - transcript = client.audio.transcriptions.create( - model=model, - file=audio_file, - language=language, - response_format='text' - ) + request_kwargs = { + 'model': model, + 'file': audio_file, + 'response_format': 'text' + } + if language: + request_kwargs['language'] = language + transcript = client.audio.transcriptions.create(**request_kwargs) # Se retornou como objeto, pega o texto if hasattr(transcript, 'text'): diff --git a/lazier/web/extractor.py b/lazier/web/extractor.py @@ -7,16 +7,33 @@ import re import random from pathlib import Path from typing import Optional, Dict, Any -import requests -from bs4 import BeautifulSoup -import pypdf -import pdfplumber +try: + import requests +except ImportError: # pragma: no cover - ambiente sem requests + requests = None + +try: + from bs4 import BeautifulSoup +except ImportError: # pragma: no cover - ambiente sem beautifulsoup4 + BeautifulSoup = None + +try: + import pypdf +except ImportError: # pragma: no cover - ambiente sem pypdf + pypdf = None + +try: + import pdfplumber +except ImportError: # pragma: no cover - ambiente sem pdfplumber + pdfplumber = None from ..utils import sanitize_xml_string def _extract_with_bs4(url: str, timeout: int) -> Dict[str, Any]: """Extrai conteúdo usando BeautifulSoup (método rápido, sem JavaScript)""" + if requests is None or BeautifulSoup is None: + raise Exception("requests e beautifulsoup4 são necessários para extrair conteúdo web.") # User-Agents rotativos para evitar bloqueios user_agents = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', @@ -259,15 +276,14 @@ def extract_web_content(url: str, timeout: int = 30, use_js: bool = True) -> Dic return content - except requests.exceptions.RequestException as e: - # Se requests falhar, tentar com Playwright se use_js estiver habilitado - if use_js: - try: - return _extract_with_playwright(url, timeout) - except Exception: - pass - raise Exception(f"Erro ao acessar URL: {str(e)}") except Exception as e: + if requests is not None and isinstance(e, requests.exceptions.RequestException): + if use_js: + try: + return _extract_with_playwright(url, timeout) + except Exception: + pass + raise Exception(f"Erro ao acessar URL: {str(e)}") raise Exception(f"Erro ao extrair conteúdo web: {str(e)}") @@ -281,6 +297,8 @@ def extract_pdf_content(file_path: str) -> Dict[str, Any]: Returns: Dicionário com conteúdo extraído e metadados """ + if pdfplumber is None and pypdf is None: + raise Exception("pdfplumber ou pypdf são necessários para processar PDFs.") import warnings import logging diff --git a/lazier/web/templates/index.html b/lazier/web/templates/index.html @@ -3,1670 +3,385 @@ <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> - <title>Lazier - Transcrição e Sumarização</title> - <link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>📢</text></svg>"> + <title>Lazier</title> <link rel="preconnect" href="https://fonts.googleapis.com"> <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> - <link href="https://fonts.googleapis.com/css2?family=Playfair+Display:wght@400;600;700&family=Work+Sans:wght@300;400;500;600;700&display=swap" rel="stylesheet"> + <link href="https://fonts.googleapis.com/css2?family=Fraunces:wght@500;700&family=Manrope:wght@400;500;600;700&display=swap" rel="stylesheet"> <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script> <style> - /* ===== VARIÁVEIS CSS ===== */ - :root { - --color-primary: #283593; - --color-primary-dark: #1a237e; - --color-primary-light: #3949ab; - --color-secondary: #ffb300; - --color-accent: #ffa000; - --color-accent-light: #ffc107; - --color-bg: #fafafa; - --color-surface: #ffffff; - --color-text: #263238; - --color-text-light: #546e7a; - --color-text-lighter: #78909c; - --color-border: #e0e0e0; - --color-border-light: #f5f5f5; - --font-display: 'Playfair Display', serif; - --font-body: 'Work Sans', sans-serif; - --spacing-xs: 2px; - --spacing-sm: 4px; - --spacing-md: 8px; - --spacing-lg: 12px; - --spacing-xl: 16px; - --spacing-2xl: 24px; - --spacing-3xl: 32px; - --border-radius-sm: 8px; - --border-radius-md: 12px; - --border-radius-lg: 16px; - --border-radius-xl: 24px; - --shadow-sm: 0 2px 4px rgba(0,0,0,0.08); - --shadow-md: 0 4px 12px rgba(0,0,0,0.12); - --shadow-lg: 0 8px 24px rgba(0,0,0,0.16); - --shadow-xl: 0 12px 40px rgba(0,0,0,0.2); - --transition-fast: 0.15s ease; - --transition-base: 0.3s ease; - --transition-slow: 0.5s ease; - } - - /* ===== RESET E BASE ===== */ - * { - margin: 0; - padding: 0; - box-sizing: border-box; - /* Firefox scrollbar */ - scrollbar-width: thin; - scrollbar-color: var(--color-secondary) rgba(255, 255, 255, 0.1); - } - - html { - scroll-behavior: smooth; - } - - /* ===== SCROLLBARS CUSTOMIZADAS ===== */ - /* Webkit (Chrome, Safari, Edge) */ - ::-webkit-scrollbar { - width: 12px; - height: 12px; - } - - ::-webkit-scrollbar-track { - background: rgba(255, 255, 255, 0.1); - border-radius: 10px; - margin: 4px; - } - - ::-webkit-scrollbar-thumb { - background: linear-gradient(135deg, var(--color-primary) 0%, var(--color-secondary) 100%); - border-radius: 10px; - border: 2px solid transparent; - background-clip: padding-box; - transition: all var(--transition-base); - } - - ::-webkit-scrollbar-thumb:hover { - background: linear-gradient(135deg, var(--color-primary-light) 0%, var(--color-accent) 100%); - transform: scaleY(1.1); - } - - ::-webkit-scrollbar-corner { - background: rgba(255, 255, 255, 0.1); - border-radius: 10px; - } - - body { - font-family: var(--font-body); - background: linear-gradient(135deg, #1a237e 0%, #283593 50%, #3949ab 100%); - background-attachment: fixed; - min-height: 100vh; - color: var(--color-text); - line-height: 1.5; - padding-top: 60px; - padding-bottom: 0; - display: flex; - flex-direction: column; - position: relative; - overflow-x: hidden; - } - - /* Background texture overlay */ - body::before { - content: ''; - position: fixed; - top: 0; - left: 0; - width: 100%; - height: 100%; - background-image: - radial-gradient(circle at 20% 50%, rgba(255, 179, 0, 0.1) 0%, transparent 50%), - radial-gradient(circle at 80% 80%, rgba(255, 160, 0, 0.1) 0%, transparent 50%); - pointer-events: none; - z-index: 0; - } - - /* ===== NAVBAR ===== */ - .navbar { - position: fixed; - top: 0; - left: 0; - right: 0; - height: 60px; - background: rgba(26, 35, 126, 0.85); - backdrop-filter: blur(20px); - -webkit-backdrop-filter: blur(20px); - padding: var(--spacing-sm) var(--spacing-md); - box-shadow: var(--shadow-sm); - z-index: 1000; - display: flex; - justify-content: space-between; - align-items: center; - border-bottom: 1px solid rgba(255, 255, 255, 0.1); - transition: var(--transition-base); - } - - .navbar-brand { - font-family: var(--font-display); - color: #ffffff; - font-size: clamp(1.125rem, 3vw, 1.5rem); - font-weight: 700; - text-decoration: none; - letter-spacing: -0.5px; - transition: var(--transition-base); - display: flex; - align-items: center; - gap: var(--spacing-xs); - } - - .navbar-brand:hover { - transform: translateY(-2px); - text-shadow: 0 4px 12px rgba(255, 179, 0, 0.3); - } - - .navbar-nav { - display: flex; - gap: var(--spacing-md); - list-style: none; - } - - .navbar-nav a { - color: rgba(255, 255, 255, 0.9); - text-decoration: none; - font-weight: 500; - font-size: 0.9rem; - padding: var(--spacing-xs) 0; - position: relative; - transition: var(--transition-base); - } - - .navbar-nav a::after { - content: ''; - position: absolute; - bottom: 0; - left: 0; - width: 0; - height: 2px; - background: var(--color-secondary); - transition: var(--transition-base); - } - - .navbar-nav a:hover, - .navbar-nav a.active { - color: #ffffff; - } - - .navbar-nav a:hover::after, - .navbar-nav a.active::after { - width: 100%; - } - - .mobile-menu-toggle { - display: none; - background: none; - border: none; - color: #ffffff; - font-size: 1.5rem; - cursor: pointer; - padding: var(--spacing-sm); - } - - /* ===== CONTAINER ===== */ - .container { - flex: 1; - max-width: 1200px; - margin: 0 auto; - padding: var(--spacing-md) var(--spacing-sm); - width: 100%; - position: relative; - z-index: 1; - } - - .page { - display: none; - background: var(--color-surface); - border-radius: var(--border-radius-md); - box-shadow: var(--shadow-sm); - padding: var(--spacing-md) var(--spacing-sm); - height: calc(100vh - 120px); - animation: fadeIn 0.5s ease; - position: relative; - overflow: hidden; - } - - .page::before { - content: ''; - position: absolute; - top: 0; - left: 0; - right: 0; - height: 2px; - background: linear-gradient(90deg, var(--color-primary), var(--color-secondary), var(--color-accent)); - } - - .page.active { - display: flex; - flex-direction: column; - overflow: hidden; - } - - .page-content { - flex: 1; - overflow-y: auto; - overflow-x: hidden; - padding: var(--spacing-sm); - display: flex; - flex-direction: column; - } - - .page-content::-webkit-scrollbar { - width: 10px; - } - - .page-content::-webkit-scrollbar-thumb { - background: linear-gradient(135deg, rgba(40, 53, 147, 0.3) 0%, rgba(255, 179, 0, 0.3) 100%); - } - - @keyframes fadeIn { - from { - opacity: 0; - transform: translateY(20px); - } - to { - opacity: 1; - transform: translateY(0); - } - } - - /* ===== TIPOGRAFIA ===== */ - body { - font-size: 0.875rem; - line-height: 1.5; - } - - h1 { - font-family: var(--font-display); - color: var(--color-primary); - margin-bottom: var(--spacing-sm); - font-size: clamp(1.5rem, 3vw, 2rem); - font-weight: 700; - line-height: 1.2; - letter-spacing: -1px; - } - - h2 { - font-family: var(--font-display); - color: var(--color-primary); - font-size: clamp(1.25rem, 2.5vw, 1.75rem); - font-weight: 600; - margin-bottom: var(--spacing-sm); - } - - h3 { - font-family: var(--font-display); - color: var(--color-text); - font-size: clamp(1rem, 2vw, 1.25rem); - font-weight: 600; - margin-bottom: var(--spacing-sm); - } - - .subtitle { - color: var(--color-text-light); - margin-bottom: var(--spacing-lg); - font-size: clamp(0.75rem, 1.5vw, 0.9rem); - font-weight: 400; - } - - /* ===== UPLOAD AREA ===== */ - .upload-area { - border: 2px dashed var(--color-border); - border-radius: var(--border-radius-md); - padding: var(--spacing-lg) var(--spacing-md); - text-align: center; - background: linear-gradient(135deg, rgba(255, 179, 0, 0.03) 0%, rgba(255, 160, 0, 0.05) 100%); - transition: all var(--transition-base); - cursor: pointer; - margin-bottom: var(--spacing-md); - position: relative; - overflow: hidden; - } - - .upload-area::before { - content: ''; - position: absolute; - top: -50%; - left: -50%; - width: 200%; - height: 200%; - background: linear-gradient(45deg, transparent, rgba(255, 179, 0, 0.1), transparent); - transform: rotate(45deg); - transition: var(--transition-slow); - opacity: 0; - } - - .upload-area:hover::before { - animation: shimmer 2s infinite; - opacity: 1; - } - - @keyframes shimmer { - 0% { transform: translateX(-100%) translateY(-100%) rotate(45deg); } - 100% { transform: translateX(100%) translateY(100%) rotate(45deg); } - } - - .upload-area:hover { - border-color: var(--color-secondary); - background: linear-gradient(135deg, rgba(255, 179, 0, 0.08) 0%, rgba(255, 160, 0, 0.12) 100%); - transform: translateY(-2px); - box-shadow: var(--shadow-md); - } - - .upload-area.dragover { - border-color: var(--color-secondary); - background: linear-gradient(135deg, rgba(255, 179, 0, 0.15) 0%, rgba(255, 160, 0, 0.2) 100%); - transform: scale(1.01); - box-shadow: var(--shadow-md); - } - - .upload-icon { - font-size: clamp(2rem, 5vw, 3rem); - margin-bottom: var(--spacing-md); - display: block; - animation: float 3s ease-in-out infinite; - } - - @keyframes float { - 0%, 100% { transform: translateY(0); } - 50% { transform: translateY(-10px); } - } - - .upload-area h3 { - font-size: clamp(0.9rem, 2.5vw, 1.125rem); - color: var(--color-text); - margin-bottom: var(--spacing-xs); - } - - .upload-area p { - color: var(--color-text-light); - font-size: clamp(0.75rem, 1.5vw, 0.875rem); - } - - .file-input { - display: none; - } - - /* ===== URL INPUT ===== */ - .url-input { - width: 100%; - padding: var(--spacing-sm) var(--spacing-md); - border: 1px solid var(--color-border); - border-radius: var(--border-radius-sm); - font-size: 0.875rem; - font-family: var(--font-body); - margin-bottom: var(--spacing-md); - transition: all var(--transition-base); - background: var(--color-surface); - color: var(--color-text); - } - - .url-input:focus { - outline: none; - border-color: var(--color-secondary); - box-shadow: 0 0 0 3px rgba(255, 179, 0, 0.1); - } - - /* ===== PROCESSING OPTIONS ===== */ - .processing-options { - margin: var(--spacing-lg) 0; - padding: var(--spacing-md); - background: linear-gradient(135deg, rgba(40, 53, 147, 0.03) 0%, rgba(26, 35, 126, 0.05) 100%); - border-radius: var(--border-radius-md); - border: 1px solid var(--color-border-light); - } - - .processing-options h3 { - margin-bottom: var(--spacing-md); - color: var(--color-primary); - } - - .option-cards { - display: grid; - grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); - gap: var(--spacing-md); - margin-top: var(--spacing-md); - } - - .option-card { - padding: var(--spacing-md) var(--spacing-sm); - border: 1px solid var(--color-border); - border-radius: var(--border-radius-sm); - cursor: pointer; - transition: all var(--transition-base); - text-align: center; - background: var(--color-surface); - position: relative; - overflow: hidden; - } - - .option-card::before { - content: ''; - position: absolute; - top: 0; - left: 0; - right: 0; - height: 2px; - background: linear-gradient(90deg, var(--color-primary), var(--color-secondary)); - transform: scaleX(0); - transition: var(--transition-base); - } - - .option-card:hover { - border-color: var(--color-secondary); - transform: translateY(-2px); - box-shadow: var(--shadow-sm); - } - - .option-card:hover::before { - transform: scaleX(1); - } - - .option-card.selected { - border-color: var(--color-secondary); - background: linear-gradient(135deg, rgba(255, 179, 0, 0.08) 0%, rgba(255, 160, 0, 0.12) 100%); - box-shadow: var(--shadow-sm); - } - - .option-card.selected::before { - transform: scaleX(1); - } - - .option-card input[type="radio"] { - margin-right: var(--spacing-sm); - accent-color: var(--color-secondary); - } - - .option-card label { - cursor: pointer; - font-weight: 600; - color: var(--color-text); - display: flex; - align-items: center; - justify-content: center; - font-size: 0.9rem; - } - - .option-card .description { - margin-top: var(--spacing-xs); - font-size: 0.75rem; - color: var(--color-text-light); - font-weight: 400; - } - - /* ===== OPTIONS GRID ===== */ - .options { - margin-top: var(--spacing-md); - display: grid; - grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); - gap: var(--spacing-md); - } - - .option-group { - display: flex; - flex-direction: column; - } - - .option-group label { - margin-bottom: var(--spacing-xs); - font-weight: 600; - color: var(--color-text); - font-size: 0.8rem; - } - - .option-group select { - padding: var(--spacing-sm) var(--spacing-md); - border: 1px solid var(--color-border); - border-radius: var(--border-radius-sm); - font-size: 0.875rem; - font-family: var(--font-body); - background: var(--color-surface); - color: var(--color-text); - cursor: pointer; - transition: all var(--transition-base); - } - - .option-group select:focus { - outline: none; - border-color: var(--color-secondary); - box-shadow: 0 0 0 3px rgba(255, 179, 0, 0.1); - } - - /* ===== BUTTONS ===== */ - .btn { - background: linear-gradient(135deg, var(--color-primary) 0%, var(--color-primary-light) 100%); - color: white; - border: none; - padding: var(--spacing-sm) var(--spacing-lg); - border-radius: var(--border-radius-sm); - font-size: 0.9rem; - font-weight: 600; - font-family: var(--font-body); - cursor: pointer; - margin-top: var(--spacing-md); - width: 100%; - transition: all var(--transition-base); - position: relative; - overflow: hidden; - box-shadow: var(--shadow-sm); - } - - .btn::before { - content: ''; - position: absolute; - top: 50%; - left: 50%; - width: 0; - height: 0; - border-radius: 50%; - background: rgba(255, 255, 255, 0.3); - transform: translate(-50%, -50%); - transition: width 0.6s, height 0.6s; - } - - .btn:hover::before { - width: 300px; - height: 300px; - } - - .btn:hover { - transform: translateY(-2px); - box-shadow: var(--shadow-md); - } - - .btn:active { - transform: translateY(0); - box-shadow: var(--shadow-sm); - } - - .btn:disabled { - opacity: 0.6; - cursor: not-allowed; - transform: none; - } - - .btn-secondary { - background: linear-gradient(135deg, var(--color-text-light) 0%, var(--color-text) 100%); - margin-top: var(--spacing-sm); - } - - .btn-small { - padding: var(--spacing-xs) var(--spacing-md); - font-size: 0.8rem; - width: auto; - margin: 0; - } - - /* ===== JOBS LIST ===== */ - .jobs-list { - margin-top: var(--spacing-md); - } - - .job-card { - background: var(--color-surface); - border-radius: var(--border-radius-sm); - padding: var(--spacing-md) var(--spacing-sm); - margin-bottom: var(--spacing-sm); - border-left: 2px solid var(--color-primary); - box-shadow: var(--shadow-sm); - transition: all var(--transition-base); - animation: slideIn 0.4s ease; - } - - @keyframes slideIn { - from { - opacity: 0; - transform: translateX(-20px); - } - to { - opacity: 1; - transform: translateX(0); - } - } - - .job-card:hover { - box-shadow: var(--shadow-sm); - transform: translateX(2px); - } - - .job-header { - display: flex; - justify-content: space-between; - align-items: center; - margin-bottom: var(--spacing-xs); - flex-wrap: wrap; - gap: var(--spacing-xs); - } - - .job-title { - font-weight: 600; - color: var(--color-text); - font-size: 0.875rem; - } - - .job-status { - padding: var(--spacing-xs) var(--spacing-sm); - border-radius: 12px; - font-size: 0.7rem; - font-weight: 600; - text-transform: uppercase; - letter-spacing: 0.3px; - } - - .status-pending { - background: rgba(255, 193, 7, 0.2); - color: #f57c00; - } - .status-processing { - background: rgba(33, 150, 243, 0.2); - color: #1976d2; - } - .status-completed { - background: rgba(76, 175, 80, 0.2); - color: #388e3c; - } - .status-failed { - background: rgba(244, 67, 54, 0.2); - color: #d32f2f; - } - - .progress-bar { - width: 100%; - height: 6px; - background: var(--color-border-light); - border-radius: 3px; - overflow: hidden; - margin: var(--spacing-xs) 0; - position: relative; - } - - .progress-fill { - height: 100%; - background: linear-gradient(90deg, var(--color-primary) 0%, var(--color-secondary) 50%, var(--color-accent) 100%); - background-size: 200% 100%; - transition: width var(--transition-base); - animation: progressShimmer 2s infinite; - border-radius: 4px; - } - - @keyframes progressShimmer { - 0% { background-position: 200% 0; } - 100% { background-position: -200% 0; } - } - - .job-actions { - margin-top: var(--spacing-sm); - display: flex; - gap: var(--spacing-xs); - flex-wrap: wrap; - } - - /* ===== FILE LIST ===== */ - .file-list { - margin-top: var(--spacing-lg); - } - - .file-item { - background: var(--color-surface); - padding: var(--spacing-md); - border-radius: var(--border-radius-sm); - margin-bottom: var(--spacing-sm); - display: flex; - justify-content: space-between; - align-items: center; - border: 1px solid var(--color-border-light); - transition: var(--transition-base); - } - - .file-item:hover { - background: rgba(255, 179, 0, 0.05); - border-color: var(--color-secondary); - } - - .btn-remove { - background: #d32f2f; - color: white; - border: none; - padding: var(--spacing-xs) var(--spacing-md); - border-radius: var(--border-radius-sm); - cursor: pointer; - font-size: 0.875rem; - margin-left: var(--spacing-md); - transition: var(--transition-base); - font-weight: 600; - } - - .btn-remove:hover { - background: #b71c1c; - transform: scale(1.05); - } - - /* ===== HISTORY FILTERS ===== */ - .history-filters { - margin-bottom: var(--spacing-md); - display: flex; - gap: var(--spacing-xs); - flex-wrap: wrap; - } - - .filter-btn { - padding: var(--spacing-xs) var(--spacing-md); - border: 1px solid var(--color-border); - background: var(--color-surface); - border-radius: var(--border-radius-sm); - cursor: pointer; - transition: all var(--transition-base); - font-family: var(--font-body); - font-weight: 500; - color: var(--color-text); - font-size: 0.8rem; - } - - .filter-btn:hover { - border-color: var(--color-secondary); - transform: translateY(-1px); - box-shadow: var(--shadow-sm); - } - - .filter-btn.active { - border-color: var(--color-secondary); - background: linear-gradient(135deg, rgba(255, 179, 0, 0.1) 0%, rgba(255, 160, 0, 0.15) 100%); - color: var(--color-primary); - font-weight: 600; - } - - /* ===== PREVIEW CONTENT ===== */ - .preview-content { - background: linear-gradient(135deg, rgba(255, 255, 255, 0.98) 0%, rgba(250, 250, 250, 0.95) 100%); - padding: var(--spacing-md); - border-radius: var(--border-radius-md); - border: 1px solid rgba(40, 53, 147, 0.1); - margin-top: var(--spacing-md); - line-height: 1.6; - box-shadow: - 0 2px 8px rgba(0, 0, 0, 0.06), - inset 0 1px 0 rgba(255, 255, 255, 0.9); - position: relative; - transition: all var(--transition-base); - overflow: visible; - } - - .preview-content::before { - content: ''; - position: absolute; - top: 0; - left: 0; - right: 0; - height: 2px; - background: linear-gradient(90deg, var(--color-primary), var(--color-secondary), var(--color-accent)); - border-radius: var(--border-radius-md) var(--border-radius-md) 0 0; - z-index: 1; - } - - .preview-content:hover { - box-shadow: - 0 4px 12px rgba(0, 0, 0, 0.1), - inset 0 1px 0 rgba(255, 255, 255, 0.9); - transform: translateY(-1px); - } - - .preview-content h1, .preview-content h2, .preview-content h3, .preview-content h4 { - margin-top: var(--spacing-md); - margin-bottom: var(--spacing-sm); - color: var(--color-primary); - font-family: var(--font-display); - font-weight: 600; - letter-spacing: -0.02em; - } - - .preview-content h1:first-child, - .preview-content h2:first-child, - .preview-content h3:first-child { - margin-top: 0; - } - - .preview-content h1 { - font-size: 1.25rem; - border-bottom: 2px solid var(--color-secondary); - padding-bottom: var(--spacing-sm); - } - - .preview-content h2 { - font-size: 1.125rem; - color: var(--color-primary); - } - - .preview-content h3 { - font-size: 1rem; - } - - .preview-content p { - margin-bottom: var(--spacing-sm); - line-height: 1.6; - letter-spacing: 0.01em; - } - - .preview-content pre { - background: linear-gradient(135deg, rgba(40, 53, 147, 0.08) 0%, rgba(40, 53, 147, 0.05) 100%); - padding: var(--spacing-md); - border-radius: var(--border-radius-sm); - overflow-x: auto; - border-left: 2px solid var(--color-secondary); - margin: var(--spacing-sm) 0; - position: relative; - box-shadow: inset 0 1px 4px rgba(0, 0, 0, 0.04); - } - - .preview-content pre::-webkit-scrollbar { - height: 8px; - } - - .preview-content pre::-webkit-scrollbar-track { - background: rgba(40, 53, 147, 0.05); - border-radius: 4px; - } - - .preview-content pre::-webkit-scrollbar-thumb { - background: var(--color-secondary); - border-radius: 4px; - border: 1px solid transparent; - background-clip: padding-box; - } - - .preview-content pre::-webkit-scrollbar-thumb:hover { - background: var(--color-accent); - } - - .preview-content code { - background: rgba(40, 53, 147, 0.1); - padding: 2px 6px; - border-radius: 3px; - font-family: 'Courier New', monospace; - font-size: 0.9em; - } - - .preview-content pre code { - background: transparent; - padding: 0; - } - - .preview-content ul, .preview-content ol { - margin: var(--spacing-sm) 0; - padding-left: var(--spacing-lg); - } - - .preview-content li { - margin-bottom: var(--spacing-xs); - line-height: 1.6; - } - - .preview-content blockquote { - border-left: 2px solid var(--color-secondary); - padding-left: var(--spacing-md); - margin: var(--spacing-sm) 0; - color: var(--color-text-light); - font-style: italic; - } - - .preview-content hr { - border: none; - border-top: 1px solid var(--color-border); - margin: var(--spacing-md) 0; - position: relative; - } - - .preview-content hr::after { - content: ''; - position: absolute; - top: -1px; - left: 0; - width: 60px; - height: 2px; - background: linear-gradient(90deg, var(--color-secondary), transparent); - } - - .preview-content strong { - font-weight: 600; - color: var(--color-text); - } - - .preview-content a { - color: var(--color-primary); - text-decoration: none; - border-bottom: 1px solid transparent; - transition: var(--transition-base); - } - - .preview-content a:hover { - border-bottom-color: var(--color-primary); - } - - .preview-text-plain { - white-space: pre-wrap; - word-wrap: break-word; - word-break: break-word; - font-family: var(--font-body); - line-height: 1.9; - letter-spacing: 0.01em; - color: var(--color-text); - padding: var(--spacing-md) 0; - } - - /* ===== FOOTER ===== */ - .main-footer { - background: linear-gradient(135deg, var(--color-primary-dark) 0%, var(--color-primary) 100%); - color: rgba(255, 255, 255, 0.9); - padding: var(--spacing-md) var(--spacing-lg); - margin-top: var(--spacing-md); - position: relative; - overflow: hidden; - } - - .main-footer::before { - content: ''; - position: absolute; - top: 0; - left: 0; - right: 0; - height: 1px; - background: linear-gradient(90deg, transparent, rgba(255, 179, 0, 0.5), transparent); - } - - .footer-content { - max-width: 1200px; - margin: 0 auto; - display: grid; - grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); - gap: var(--spacing-md); - } - - .footer-section h4 { - font-family: var(--font-display); - color: var(--color-secondary); - font-size: 0.9rem; - margin-bottom: var(--spacing-xs); - font-weight: 600; - } - - .footer-section p, - .footer-section li { - color: rgba(255, 255, 255, 0.8); - font-size: 0.75rem; - line-height: 1.5; - margin-bottom: var(--spacing-xs); - } - - .footer-section ul { - list-style: none; - padding: 0; - } - - .footer-section a { - color: rgba(255, 255, 255, 0.9); - text-decoration: none; - transition: var(--transition-base); - display: inline-block; - } - - .footer-section a:hover { - color: var(--color-secondary); - transform: translateX(4px); - } - - .footer-bottom { - max-width: 1200px; - margin: var(--spacing-md) auto 0; - padding-top: var(--spacing-sm); - border-top: 1px solid rgba(255, 255, 255, 0.1); - text-align: center; - color: rgba(255, 255, 255, 0.7); - font-size: 0.75rem; - } - - /* ===== RESPONSIVIDADE ===== */ - @media (max-width: 768px) { - .navbar { - padding: var(--spacing-xs) var(--spacing-sm); - height: 50px; - } - - .navbar-nav { - position: fixed; - top: 50px; - left: 0; - right: 0; - background: rgba(26, 35, 126, 0.98); - backdrop-filter: blur(20px); - flex-direction: column; - padding: var(--spacing-md); - gap: var(--spacing-sm); - transform: translateX(-100%); - transition: var(--transition-base); - box-shadow: var(--shadow-md); - } - - .navbar-nav.active { - transform: translateX(0); - } - - .mobile-menu-toggle { - display: block; - } - - .container { - padding: var(--spacing-sm); - } - - .page { - padding: var(--spacing-sm); - border-radius: var(--border-radius-sm); - height: calc(100vh - 90px); - } - - .option-cards { - grid-template-columns: 1fr; - gap: var(--spacing-sm); - } - - .options { - grid-template-columns: 1fr; - gap: var(--spacing-sm); - } - - .footer-content { - grid-template-columns: 1fr; - gap: var(--spacing-md); - } - - .job-header { - flex-direction: column; - align-items: flex-start; - } - } - - @media (min-width: 769px) and (max-width: 1024px) { - .footer-content { - grid-template-columns: repeat(2, 1fr); - gap: var(--spacing-md); - } - } - - /* ===== ACESSIBILIDADE ===== */ - *:focus-visible { - outline: 3px solid var(--color-secondary); - outline-offset: 2px; - } - - @media (prefers-reduced-motion: reduce) { - *, - *::before, - *::after { - animation-duration: 0.01ms !important; - animation-iteration-count: 1 !important; - transition-duration: 0.01ms !important; - } - } + :root { --bg:#f5f1ea; --card:rgba(255,255,255,.9); --ink:#223437; --muted:#66787b; --line:rgba(34,52,55,.12); --accent:#2f7a71; --accent-soft:rgba(47,122,113,.1); --warn:#b85f49; } + * { box-sizing:border-box; margin:0; padding:0; } + body { font-family:'Manrope',sans-serif; color:var(--ink); background:radial-gradient(circle at top left, rgba(47,122,113,.08), transparent 26%), linear-gradient(180deg,#faf7f2 0%,#f2ede6 100%); min-height:100vh; padding:20px; } + a { color:inherit; text-decoration:none; } + button,input,select { font:inherit; } + .shell { max-width:1180px; margin:0 auto; } + .topbar,.card,.job,.modal-card { background:var(--card); border:1px solid rgba(255,255,255,.9); box-shadow:0 18px 42px rgba(40,56,60,.08); backdrop-filter:blur(14px); } + .topbar { border-radius:999px; padding:16px 20px; display:flex; justify-content:space-between; gap:16px; align-items:center; margin-bottom:20px; flex-wrap:wrap; } + .brand { display:flex; gap:12px; align-items:center; } + .mark { width:42px; height:42px; border-radius:14px; display:grid; place-items:center; background:linear-gradient(135deg, rgba(47,122,113,.12), rgba(198,125,69,.12)); } + .brand h1,.title { font-family:'Fraunces',serif; letter-spacing:-.03em; } + .brand p,.subtle,.hero p,.field label,.meta,.empty,.error { color:var(--muted); } + .nav { display:flex; gap:8px; flex-wrap:wrap; } + .nav a,.nav button,.filter,.action { border:0; background:transparent; border-radius:999px; padding:10px 14px; cursor:pointer; } + .nav a.active,.nav a:hover,.nav button:hover,.filter.active { background:var(--accent-soft); color:var(--accent); } + .page { display:none; } + .page.active { display:block; } + .hero { display:grid; grid-template-columns:1.1fr .9fr; gap:18px; } + .card { border-radius:28px; padding:28px; } + .eyebrow { display:inline-block; background:var(--accent-soft); color:var(--accent); padding:8px 12px; border-radius:999px; font-size:.85rem; font-weight:700; } + .hero h2 { font-family:'Fraunces',serif; font-size:clamp(2rem,4vw,3.2rem); line-height:1.04; margin:16px 0; letter-spacing:-.04em; } + .hero p { line-height:1.7; max-width:56ch; } + .notes { display:grid; grid-template-columns:repeat(3,1fr); gap:10px; margin-top:22px; } + .note { border:1px solid var(--line); border-radius:18px; padding:14px; background:rgba(255,255,255,.6); } + .note strong { display:block; margin-bottom:6px; } + .stack { display:flex; flex-direction:column; gap:16px; } + .field { display:flex; flex-direction:column; gap:8px; } + .field label { font-size:.88rem; font-weight:700; } + .field input,.field select { width:100%; padding:14px 16px; border-radius:16px; border:1px solid var(--line); background:rgba(255,255,255,.95); } + .upload { padding:24px; border-radius:24px; border:1.5px dashed rgba(47,122,113,.28); background:linear-gradient(180deg, rgba(223,240,236,.55), rgba(255,255,255,.75)); cursor:pointer; } + .upload.dragover { transform:translateY(-1px); } + .file-list { display:grid; gap:10px; } + .file { display:grid; grid-template-columns:1fr auto auto; gap:10px; align-items:center; padding:12px 14px; border-radius:16px; background:#fff; border:1px solid var(--line); } + .remove { width:34px; height:34px; border-radius:999px; border:0; background:rgba(184,95,73,.1); color:var(--warn); cursor:pointer; } + .mode-grid { display:grid; grid-template-columns:repeat(2,1fr); gap:10px; } + .mode { border:1px solid var(--line); border-radius:20px; padding:16px; cursor:pointer; background:rgba(255,255,255,.7); } + .mode.selected { border-color:rgba(47,122,113,.35); background:linear-gradient(180deg, rgba(223,240,236,.8), rgba(255,255,255,.9)); } + .mode input { display:none; } + .mode strong { display:block; margin-bottom:6px; } + .row { display:grid; grid-template-columns:1fr 220px; gap:12px; align-items:end; } + .btn { border:0; border-radius:16px; padding:14px 16px; background:linear-gradient(135deg,var(--accent),#225b55); color:#fff; font-weight:700; cursor:pointer; } + .btn:disabled { opacity:.65; cursor:not-allowed; } + .section { margin-top:20px; } + .header { display:flex; justify-content:space-between; gap:14px; align-items:end; margin-bottom:14px; } + .list { display:grid; gap:12px; } + .job { border-radius:22px; padding:20px; } + .job-head { display:flex; justify-content:space-between; gap:12px; align-items:flex-start; } + .job-title { font-weight:700; line-height:1.45; } + .chips,.actions,.filters { display:flex; gap:8px; flex-wrap:wrap; } + .chip,.status { border-radius:999px; padding:8px 12px; font-size:.82rem; } + .chip { background:rgba(34,52,55,.06); color:var(--muted); } + .status.pending,.status.processing { background:rgba(198,125,69,.14); color:#8f5a2d; } + .status.completed { background:rgba(47,122,113,.14); color:var(--accent); } + .status.failed,.status.interrupted { background:rgba(184,95,73,.12); color:var(--warn); } + .bar { height:8px; background:rgba(34,52,55,.08); border-radius:999px; overflow:hidden; margin-top:14px; } + .bar span { display:block; height:100%; background:linear-gradient(90deg,var(--accent),#c67d45); } + .actions,.filters { margin-top:14px; } + .action,.filter { border:1px solid var(--line); background:rgba(255,255,255,.9); } + .empty { padding:28px; border:1px dashed var(--line); border-radius:22px; text-align:center; background:rgba(255,255,255,.45); } + .error { margin-top:12px; color:var(--warn); } + .footer { text-align:center; font-size:.84rem; padding:22px 0 10px; color:var(--muted); } + .modal { position:fixed; inset:0; background:rgba(26,34,37,.48); display:none; align-items:center; justify-content:center; padding:24px; } + .modal.open { display:flex; } + .modal-card { width:min(900px,100%); max-height:85vh; overflow:auto; border-radius:24px; padding:24px; } + .modal-top { display:flex; justify-content:space-between; gap:12px; margin-bottom:16px; } + .close { width:40px; height:40px; border-radius:999px; border:0; background:rgba(34,52,55,.08); cursor:pointer; } + .preview { white-space:pre-wrap; line-height:1.7; } + .preview h1,.preview h2,.preview h3 { font-family:'Fraunces',serif; margin:16px 0 8px; } + .preview hr { border:0; border-top:1px solid var(--line); margin:18px 0; } + @media (max-width:980px) { .hero,.row { grid-template-columns:1fr; } .notes { grid-template-columns:1fr; } } + @media (max-width:640px) { body { padding:14px; } .mode-grid { grid-template-columns:1fr; } .job-head,.header { flex-direction:column; align-items:flex-start; } } </style> </head> <body> - <!-- Menu de Navegação --> - <nav class="navbar"> - <a href="#" class="navbar-brand" onclick="showPage('process'); return false;">🎧 Lazier</a> - <button class="mobile-menu-toggle" onclick="toggleMobileMenu()" aria-label="Toggle menu">☰</button> - <ul class="navbar-nav" id="navbarNav"> - <li><a href="#" class="active" onclick="showPage('process'); return false;">Processar</a></li> - <li><a href="#" onclick="showPage('history'); return false;">Histórico</a></li> - <li><a href="#" onclick="showPage('downloads'); return false;">Downloads</a></li> - <li><a href="#" onclick="logout(); return false;" style="color: var(--color-secondary);">Sair</a></li> - </ul> - </nav> - - <div class="container"> - <!-- Página: Processar --> - <div id="page-process" class="page active"> - <div class="page-content"> - <h1>🎧 Lazier</h1> - <p class="subtitle">Transcrição e Sumarização de Áudios, Vídeos, Textos e PDFs usando OpenAI API</p> - - <div class="upload-area" id="uploadArea" style="flex-shrink: 0;"> - <div class="upload-icon">📁</div> - <h3>Arraste arquivos aqui ou clique para selecionar</h3> - <p>Suporta: Áudio (mp3, wav, m4a...), Vídeo (mp4, avi, mkv...), PDF, Texto (txt, md, html)</p> - <input type="file" id="fileInput" class="file-input" multiple accept=".mp3,.wav,.m4a,.mp4,.avi,.mkv,.pdf,.txt,.md,.html"> + <div class="shell"> + <header class="topbar"> + <div class="brand"> + <div class="mark">🎧</div> + <div> + <h1>Lazier</h1> + <p>Transcrição ou sumário, sempre em português do Brasil.</p> </div> - - <input type="text" id="urlInput" class="url-input" placeholder="Ou cole uma URL do YouTube ou página web aqui..."> - - <!-- Opções de Processamento --> - <div class="processing-options" style="flex-shrink: 0;"> - <h3>Modo de Processamento</h3> - <div class="option-cards"> - <div class="option-card selected" onclick="selectProcessingMode('both', this)"> - <label> - <input type="radio" name="processingMode" value="both" checked> - Transcrever + Sumarizar - </label> - <div class="description">Transcreve e gera sumário completo</div> - </div> - <div class="option-card" onclick="selectProcessingMode('transcribe', this)"> - <label> - <input type="radio" name="processingMode" value="transcribe"> - Apenas Transcrever - </label> - <div class="description">Apenas transcrição do conteúdo</div> + </div> + <nav class="nav"> + <a href="#" class="active" onclick="showPage('process');return false;">Processar</a> + <a href="#" onclick="showPage('history');return false;">Histórico</a> + <a href="#" onclick="showPage('downloads');return false;">Downloads</a> + <button onclick="logout();return false;">Sair</button> + </nav> + </header> + + <section id="page-process" class="page active"> + <div class="hero"> + <div class="card"> + <div class="eyebrow">Fluxo simplificado</div> + <h2>Escolha um modo e receba o resultado final em português.</h2> + <p>Envie arquivos, vídeos, links, páginas, textos ou PDFs. O Lazier detecta o idioma, converte o conteúdo e salva o artefato em uma estrutura de saída mais organizada.</p> + <div class="notes"> + <div class="note"><strong>Um objetivo por vez</strong><span class="meta">Agora o fluxo é transcrever ou resumir.</span></div> + <div class="note"><strong>Saídas mais limpas</strong><span class="meta">Pastas por data e job com nomes previsíveis.</span></div> + <div class="note"><strong>Histórico persistente</strong><span class="meta">Jobs continuam visíveis após reiniciar o servidor.</span></div> + </div> + </div> + + <div class="card stack"> + <div> + <div class="title">Novo processamento</div> + <p class="subtle">A saída final sempre será em português do Brasil.</p> + </div> + <div class="upload" id="uploadArea"><input type="file" id="fileInput" hidden multiple accept=".mp3,.wav,.m4a,.aac,.flac,.ogg,.opus,.wma,.3gp,.3g2,.amr,.au,.caf,.mka,.ra,.rm,.spx,.tta,.wv,.mp4,.avi,.mkv,.mov,.wmv,.flv,.webm,.m4v,.asf,.f4v,.m2v,.mts,.m2ts,.ogv,.rmvb,.ts,.vob,.pdf,.txt,.md,.html,.htm"></div> + <div class="field"><label for="urlInput">Ou cole uma URL</label><input type="text" id="urlInput" placeholder="YouTube, TED, Vimeo, página web, artigo, etc."></div> + <div class="field"> + <label>Modo</label> + <div class="mode-grid"> + <div class="mode selected" onclick="selectMode('transcribe',this)"><input type="radio" checked value="transcribe"><strong>Apenas transcrever</strong><span class="meta">Entrega o conteúdo completo em português.</span></div> + <div class="mode" onclick="selectMode('summarize',this)"><input type="radio" value="summarize"><strong>Apenas resumir</strong><span class="meta">Entrega um sumário em português.</span></div> </div> - <div class="option-card" onclick="selectProcessingMode('summarize', this)"> - <label> - <input type="radio" name="processingMode" value="summarize"> - Apenas Sumarizar - </label> - <div class="description">Apenas sumário (textos/PDFs)</div> + </div> + <div class="row"> + <div class="field"> + <label for="formatSelect">Formato de saída</label> + <select id="formatSelect"><option value="docx">DOCX</option><option value="txt">TXT</option><option value="md">Markdown</option><option value="json">JSON</option><option value="pdf">PDF</option></select> </div> + <button class="btn" id="processBtn" onclick="processFiles()">Processar</button> </div> </div> - - <div class="options" style="flex-shrink: 0;"> - <div class="option-group"> - <label>Formato de Saída</label> - <select id="formatSelect"> - <option value="docx">DOCX</option> - <option value="txt">TXT</option> - <option value="md">Markdown</option> - <option value="json">JSON</option> - <option value="pdf">PDF</option> - </select> - </div> - <div class="option-group"> - <label>Idioma</label> - <select id="languageSelect"> - <option value="pt">Português (Brasil)</option> - <option value="en">English</option> - </select> + </div> + + <div class="card section"> + <div class="header"> + <div> + <div class="title">Jobs em andamento</div> + <p class="subtle">Acompanhe progresso, visualize conteúdo e faça download dos artefatos.</p> </div> </div> - - <button class="btn" id="processBtn" onclick="processFiles()" style="flex-shrink: 0;">Processar</button> - - <div class="jobs-list" id="jobsList" style="flex: 1; overflow-y: auto; min-height: 0;"></div> + <div class="list" id="jobsList"></div> </div> - </div> - - <!-- Página: Histórico --> - <div id="page-history" class="page"> - <div class="page-content"> - <h1>Histórico de Processamentos</h1> - <p class="subtitle">Visualize todos os seus jobs processados</p> - - <div class="history-filters"> - <button class="filter-btn active" onclick="filterHistory('all')">Todos</button> - <button class="filter-btn" onclick="filterHistory('completed')">Concluídos</button> - <button class="filter-btn" onclick="filterHistory('processing')">Processando</button> - <button class="filter-btn" onclick="filterHistory('failed')">Falhados</button> + </section> + + <section id="page-history" class="page"> + <div class="card"> + <div class="header"> + <div><div class="title">Histórico</div><p class="subtle">Jobs persistidos, inclusive após reinício do servidor.</p></div> + </div> + <div class="filters"> + <button class="filter active" onclick="filterHistory('all',this)">Todos</button> + <button class="filter" onclick="filterHistory('completed',this)">Concluídos</button> + <button class="filter" onclick="filterHistory('processing',this)">Processando</button> + <button class="filter" onclick="filterHistory('failed',this)">Falhados</button> + <button class="filter" onclick="filterHistory('interrupted',this)">Interrompidos</button> </div> - - <div class="jobs-list" id="historyList"></div> + <div class="list" id="historyList"></div> </div> - </div> - - <!-- Página: Downloads --> - <div id="page-downloads" class="page"> - <div class="page-content"> - <h1>Downloads Disponíveis</h1> - <p class="subtitle">Acesse rapidamente seus arquivos processados</p> - - <div class="jobs-list" id="downloadsList"></div> + </section> + + <section id="page-downloads" class="page"> + <div class="card"> + <div class="header"> + <div><div class="title">Downloads</div><p class="subtle">Acesso rápido aos arquivos prontos.</p></div> + </div> + <div class="list" id="downloadsList"></div> </div> - </div> - </div> + </section> - <!-- Footer Principal --> - <footer class="main-footer"> + <footer class="footer">Desenvolvido por Pablo Murad · <span id="currentYear"></span></footer> + </div> - - <div class="footer-bottom"> - <p>Desenvolvido com ❤️ por Pablo Murad - <span id="currentYear"></span></p> + <div class="modal" id="previewModal" onclick="closePreview(event)"> + <div class="modal-card" onclick="event.stopPropagation()"> + <div class="modal-top"> + <div><div class="title" id="previewTitle">Pré-visualização</div><p class="subtle">Conteúdo persistido do job selecionado.</p></div> + <button class="close" onclick="closePreview()">✕</button> + </div> + <div class="preview" id="previewContent"></div> </div> - </footer> - + </div> + <script> - // Estado da aplicação - let currentPage = 'process'; let selectedFiles = []; - let processingMode = 'both'; + let processingMode = 'transcribe'; let allJobs = []; let currentFilter = 'all'; - - // Inicialização + document.addEventListener('DOMContentLoaded', () => { document.getElementById('currentYear').textContent = new Date().getFullYear(); + renderUpload(); + bindUpload(); loadHistory(); - - // Animações de entrada - const elements = document.querySelectorAll('.page-content > *'); - elements.forEach((el, index) => { - el.style.opacity = '0'; - el.style.transform = 'translateY(20px)'; - setTimeout(() => { - el.style.transition = 'all 0.5s ease'; - el.style.opacity = '1'; - el.style.transform = 'translateY(0)'; - }, index * 100); - }); }); - - // Toggle mobile menu - function toggleMobileMenu() { - const nav = document.getElementById('navbarNav'); - nav.classList.toggle('active'); - } - - // Logout - async function logout() { - try { - const response = await fetch('/logout', { - method: 'POST', - credentials: 'include' - }); - if (response.ok || response.redirected) { - window.location.href = '/login'; - } - } catch (error) { - console.error('Erro ao fazer logout:', error); - // Mesmo com erro, redireciona para login - window.location.href = '/login'; + + function renderUpload() { + const area = document.getElementById('uploadArea'); + if (!selectedFiles.length) { + area.innerHTML = '<strong>Arraste arquivos aqui ou clique para selecionar</strong><p class="subtle" style="margin-top:10px;">Suporta áudio, vídeo, PDF, texto e HTML. O resultado final será salvo em português do Brasil.</p>'; + return; } + area.innerHTML = `<div class="file-list">${selectedFiles.map((file, index) => `<div class="file"><div><strong>${escapeHtml(file.name)}</strong><div class="meta">${(file.size / 1024 / 1024).toFixed(2)} MB</div></div><div class="chip">${file.type || 'arquivo'}</div><button class="remove" onclick="removeFile(${index})">✕</button></div>`).join('')}</div>`; + } + + function bindUpload() { + const area = document.getElementById('uploadArea'); + const input = document.getElementById('fileInput'); + area.addEventListener('click', (event) => { if (!event.target.classList.contains('remove')) input.click(); }); + area.addEventListener('dragover', (event) => { event.preventDefault(); area.classList.add('dragover'); }); + area.addEventListener('dragleave', () => area.classList.remove('dragover')); + area.addEventListener('drop', (event) => { event.preventDefault(); area.classList.remove('dragover'); selectedFiles = Array.from(event.dataTransfer.files); renderUpload(); }); + input.addEventListener('change', (event) => { selectedFiles = Array.from(event.target.files); renderUpload(); }); } - - // Navegação + + function removeFile(index) { selectedFiles.splice(index, 1); document.getElementById('fileInput').value = ''; renderUpload(); } + function selectMode(mode, element) { processingMode = mode; document.querySelectorAll('.mode').forEach((node) => node.classList.remove('selected')); element.classList.add('selected'); } + async function logout() { try { await fetch('/logout', { method:'POST', credentials:'include' }); } finally { window.location.href = '/login'; } } + function showPage(page) { - document.querySelectorAll('.page').forEach(p => p.classList.remove('active')); - document.querySelectorAll('.navbar-nav a').forEach(a => a.classList.remove('active')); - + document.querySelectorAll('.page').forEach((node) => node.classList.remove('active')); + document.querySelectorAll('.nav a').forEach((node) => node.classList.remove('active')); document.getElementById(`page-${page}`).classList.add('active'); - const navLinks = document.querySelectorAll('.navbar-nav a'); - const pageIndex = ['process', 'history', 'downloads'].indexOf(page); - if (pageIndex >= 0 && navLinks[pageIndex]) { - navLinks[pageIndex].classList.add('active'); - } - - // Fecha menu mobile se estiver aberto - document.getElementById('navbarNav').classList.remove('active'); - - currentPage = page; - - if (page === 'history') { - loadHistory(); - } else if (page === 'downloads') { - loadDownloads(); - } - } - - // Seleção de modo de processamento - function selectProcessingMode(mode, element) { - processingMode = mode; - document.querySelectorAll('.option-card').forEach(card => card.classList.remove('selected')); - if (element) { - element.classList.add('selected'); - } else { - event.currentTarget.classList.add('selected'); - } - document.querySelector(`input[value="${mode}"]`).checked = true; - } - - // Upload e Drag & Drop - const uploadArea = document.getElementById('uploadArea'); - const fileInput = document.getElementById('fileInput'); - - uploadArea.addEventListener('click', () => fileInput.click()); - uploadArea.addEventListener('dragover', (e) => { - e.preventDefault(); - uploadArea.classList.add('dragover'); - }); - uploadArea.addEventListener('dragleave', () => { - uploadArea.classList.remove('dragover'); - }); - uploadArea.addEventListener('drop', (e) => { - e.preventDefault(); - uploadArea.classList.remove('dragover'); - selectedFiles = Array.from(e.dataTransfer.files); - updateFileList(); - }); - - fileInput.addEventListener('change', (e) => { - selectedFiles = Array.from(e.target.files); - updateFileList(); - }); - - function updateFileList() { - if (selectedFiles.length > 0) { - const fileListHTML = ` - <div class="file-list"> - ${selectedFiles.map((f, index) => ` - <div class="file-item"> - <span>${f.name}</span> - <span>${(f.size / 1024 / 1024).toFixed(2)} MB</span> - <button onclick="removeFile(${index})" class="btn-remove" title="Remover arquivo">✕</button> - </div> - `).join('')} - </div> - <p style="margin-top: var(--spacing-md); color: var(--color-text-light);">Clique novamente para selecionar outros arquivos</p> - `; - uploadArea.innerHTML = fileListHTML; - // Re-adiciona event listener após atualizar HTML - uploadArea.addEventListener('click', (e) => { - if (!e.target.classList.contains('btn-remove')) { - fileInput.click(); - } - }); - } else { - uploadArea.innerHTML = ` - <div class="upload-icon">📁</div> - <h3>Arraste arquivos aqui ou clique para selecionar</h3> - <p>Suporta: Áudio (mp3, wav, m4a...), Vídeo (mp4, avi, mkv...), PDF, Texto (txt, md, html)</p> - `; - uploadArea.addEventListener('click', () => fileInput.click()); - } - } - - function removeFile(index) { - selectedFiles.splice(index, 1); - updateFileList(); - fileInput.value = ''; + const index = ['process','history','downloads'].indexOf(page); + const links = Array.from(document.querySelectorAll('.nav a')); + if (links[index]) links[index].classList.add('active'); + if (page === 'history') loadHistory(); + if (page === 'downloads') loadDownloads(); } - - // Processamento + async function processFiles() { + const button = document.getElementById('processBtn'); const format = document.getElementById('formatSelect').value; const url = document.getElementById('urlInput').value.trim(); - - const transcribe = processingMode === 'both' || processingMode === 'transcribe'; - const summarize = processingMode === 'both' || processingMode === 'summarize'; - - const processBtn = document.getElementById('processBtn'); - processBtn.disabled = true; - processBtn.textContent = 'Processando...'; - + button.disabled = true; + button.textContent = 'Processando...'; try { if (url) { - const response = await fetch('/api/process', { - method: 'POST', - headers: {'Content-Type': 'application/json'}, - body: JSON.stringify({url, format, transcribe, summarize}), - credentials: 'include' - }); + const response = await fetch('/api/process', { method:'POST', headers:{'Content-Type':'application/json'}, body:JSON.stringify({ url, format, mode: processingMode }), credentials:'include' }); const data = await response.json(); - addJob(data.job_id, url, 'URL'); + if (!response.ok) throw new Error(data.detail || 'Falha ao processar URL'); + addJob(data.job_id, url, processingMode); startPolling(data.job_id); - } else if (selectedFiles.length > 0) { + } else if (selectedFiles.length) { const formData = new FormData(); - selectedFiles.forEach(f => formData.append('files', f)); + selectedFiles.forEach((file) => formData.append('files', file)); formData.append('format', format); - formData.append('transcribe', transcribe); - formData.append('summarize', summarize); - - const response = await fetch('/api/upload', { - method: 'POST', - body: formData, - credentials: 'include' - }); + formData.append('mode', processingMode); + const response = await fetch('/api/upload', { method:'POST', body:formData, credentials:'include' }); const data = await response.json(); - - data.job_ids.forEach(jobId => { - addJob(jobId, selectedFiles[0].name, 'Arquivo'); - startPolling(jobId); - }); + if (!response.ok) throw new Error(data.detail || 'Falha ao enviar arquivos'); + data.job_ids.forEach((jobId, index) => { addJob(jobId, selectedFiles[index] ? selectedFiles[index].name : 'Arquivo', processingMode); startPolling(jobId); }); } else { - alert('Selecione arquivos ou informe uma URL'); + throw new Error('Selecione arquivos ou informe uma URL.'); } } catch (error) { - console.error('Erro:', error); - alert('Erro ao processar: ' + error.message); + alert(error.message); } finally { - processBtn.disabled = false; - processBtn.textContent = 'Processar'; + button.disabled = false; + button.textContent = 'Processar'; + loadHistory(); } } - - // Gerenciamento de Jobs - function addJob(jobId, name, type) { - const jobCard = document.createElement('div'); - jobCard.className = 'job-card'; - jobCard.id = `job-${jobId}`; - jobCard.innerHTML = ` - <div class="job-header"> - <div class="job-title"> - <strong>${type}:</strong> ${name} - </div> - <span class="job-status status-pending" id="status-${jobId}">Pendente</span> - </div> - <div class="progress-bar"> - <div class="progress-fill" id="progress-${jobId}" style="width: 0%"></div> - </div> - <div id="result-${jobId}"></div> - `; - document.getElementById('jobsList').appendChild(jobCard); + + function renderJob(job) { + const title = job.title || job.url || job.file_path || `Job ${job.id}`; + const statusLabel = { pending:'Pendente', processing:'Processando', completed:'Concluído', failed:'Falhou', interrupted:'Interrompido' }[job.status] || job.status; + return `<div class="job-head"><div><div class="job-title">${escapeHtml(title)}</div><div class="chips" style="margin-top:10px;">${job.mode ? `<span class="chip">${job.mode === 'transcribe' ? 'Transcrição' : 'Sumário'}</span>` : ''}${job.format ? `<span class="chip">${job.format.toUpperCase()}</span>` : ''}${job.created_at ? `<span class="chip">${new Date(job.created_at).toLocaleString('pt-BR')}</span>` : ''}</div></div><span class="status ${job.status}">${statusLabel}</span></div><div class="bar"><span style="width:${job.progress || 0}%"></span></div>${job.error ? `<div class="error">${escapeHtml(job.error)}</div>` : ''}${renderActions(job)}`; + } + + function renderActions(job) { + if (job.status !== 'completed' && job.status !== 'interrupted') return ''; + let html = '<div class="actions">'; + if (job.result_path) html += `<a class="action" href="/api/jobs/${job.id}/download">Baixar principal</a>`; + if (job.has_transcription) html += `<a class="action" href="/api/jobs/${job.id}/transcription">Baixar transcrição</a>`; + if (job.has_summary) html += `<a class="action" href="/api/jobs/${job.id}/summary">Baixar sumário</a>`; + html += `<button class="action" onclick="viewJobDetails('${job.id}')">Visualizar</button></div>`; + return html; + } + + function addJob(jobId, title, mode) { + const node = document.createElement('div'); + node.className = 'job'; + node.id = `job-${jobId}`; + node.dataset.title = title; + node.innerHTML = renderJob({ id: jobId, title, mode, status: 'pending', progress: 0, has_transcription: false, has_summary: false }); + document.getElementById('jobsList').prepend(node); } - + + function updateJob(jobId, data) { + const node = document.getElementById(`job-${jobId}`); + if (!node) return; + node.innerHTML = renderJob({ id: jobId, title: node.dataset.title || undefined, ...data }); + } + async function startPolling(jobId) { - const ws = new WebSocket(`ws://${window.location.host}/ws/progress/${jobId}`); - - ws.onmessage = (event) => { - const data = JSON.parse(event.data); - updateJob(jobId, data); - }; - + const protocol = window.location.protocol === 'https:' ? 'wss' : 'ws'; + let socket = null; + try { + socket = new WebSocket(`${protocol}://${window.location.host}/ws/progress/${jobId}`); + socket.onmessage = (event) => { const data = JSON.parse(event.data); if (data.type !== 'ping') updateJob(jobId, data); }; + } catch (_error) {} const interval = setInterval(async () => { try { - const response = await fetch(`/api/jobs/${jobId}`, { - credentials: 'include' - }); + const response = await fetch(`/api/jobs/${jobId}`, { credentials:'include' }); const data = await response.json(); + if (!response.ok) throw new Error(); updateJob(jobId, data); - - if (data.status === 'completed' || data.status === 'failed') { + if (['completed','failed','interrupted'].includes(data.status)) { clearInterval(interval); - ws.close(); + if (socket) socket.close(); loadHistory(); + loadDownloads(); } - } catch (error) { - console.error('Erro ao verificar status:', error); + } catch (_error) { + clearInterval(interval); + if (socket) socket.close(); } }, 2000); } - - function updateJob(jobId, data) { - const statusEl = document.getElementById(`status-${jobId}`); - const progressEl = document.getElementById(`progress-${jobId}`); - const resultEl = document.getElementById(`result-${jobId}`); - - if (!statusEl) return; - - statusEl.textContent = data.status === 'completed' ? 'Concluído' : - data.status === 'failed' ? 'Falhou' : - data.status === 'processing' ? 'Processando' : 'Pendente'; - statusEl.className = `job-status status-${data.status}`; - progressEl.style.width = `${data.progress || 0}%`; - - if (data.status === 'completed') { - let actionsHTML = '<div class="job-actions">'; - - if (data.result_path) { - actionsHTML += `<a href="/api/jobs/${jobId}/download" class="btn btn-small">📥 Download Completo</a>`; - } - - if (data.has_transcription) { - actionsHTML += `<a href="/api/jobs/${jobId}/transcription" class="btn btn-small btn-secondary">📄 Download Transcrição</a>`; - } - - if (data.has_summary) { - actionsHTML += `<a href="/api/jobs/${jobId}/summary" class="btn btn-small btn-secondary">📝 Download Sumário</a>`; - } - - actionsHTML += `<button class="btn btn-small btn-secondary" onclick="viewJobDetails('${jobId}')">👁️ Visualizar</button>`; - actionsHTML += '</div>'; - - resultEl.innerHTML = actionsHTML; - } else if (data.status === 'failed') { - resultEl.innerHTML = `<p style="color:#d32f2f; margin-top:var(--spacing-md);">Erro: ${data.error || 'Erro desconhecido'}</p>`; - } - } - - async function viewJobDetails(jobId) { - try { - const response = await fetch(`/api/jobs/${jobId}/details`, { - credentials: 'include' - }); - const data = await response.json(); - - const format = data.format || 'docx'; - - let content = '<div class="preview-content">'; - - if (data.summary) { - content += '<h3>Sumário</h3>'; - const shouldRenderMarkdown = (format === 'md' || format === 'markdown') || isMarkdownContent(data.summary); - - if (shouldRenderMarkdown && typeof marked !== 'undefined') { - content += marked.parse(data.summary); - } else { - content += '<div class="preview-text-plain">' + escapeHtml(data.summary) + '</div>'; - } - content += '<hr>'; - } - - if (data.transcription) { - content += '<h3>Transcrição</h3>'; - const shouldRenderMarkdown = (format === 'md' || format === 'markdown') || isMarkdownContent(data.transcription); - - if (shouldRenderMarkdown && typeof marked !== 'undefined') { - content += marked.parse(data.transcription); - } else { - content += '<div class="preview-text-plain">' + escapeHtml(data.transcription) + '</div>'; - } - } - - content += '</div>'; - - document.getElementById(`result-${jobId}`).innerHTML += content; - } catch (error) { - alert('Erro ao carregar detalhes: ' + error.message); - } - } - - function escapeHtml(text) { - if (!text) return ''; - const div = document.createElement('div'); - div.textContent = text; - return div.innerHTML; - } - - function isMarkdownContent(text) { - if (!text) return false; - const markdownPatterns = [ - /^\s*#{1,6}\s+.+$/m, - /\*\*[^*]+\*\*/, - /\*[^*]+\*/, - /^\s*[-*+]\s+.+$/m, - /^\s*\d+\.\s+.+$/m, - /\[.+\]\(.+\)/, - /```[\s\S]*?```/, - /`[^`]+`/, - /^>\s+.+$/m, - /^\|.+\|$/m, - ]; - - return markdownPatterns.some(pattern => pattern.test(text)); - } - - // Histórico + async function loadHistory() { try { - const response = await fetch('/api/history', { - credentials: 'include' - }); + const response = await fetch('/api/history', { credentials:'include' }); const data = await response.json(); allJobs = data.jobs || []; renderHistory(); - } catch (error) { - console.error('Erro ao carregar histórico:', error); + loadDownloads(); + } catch (_error) { + document.getElementById('historyList').innerHTML = '<div class="empty">Não foi possível carregar o histórico.</div>'; } } - - function filterHistory(filter) { + + function filterHistory(filter, button) { currentFilter = filter; - document.querySelectorAll('.filter-btn').forEach(btn => btn.classList.remove('active')); - event.target.classList.add('active'); + document.querySelectorAll('.filter').forEach((node) => node.classList.remove('active')); + button.classList.add('active'); renderHistory(); } - + function renderHistory() { - const historyList = document.getElementById('historyList'); - historyList.innerHTML = ''; - - let filteredJobs = allJobs; - if (currentFilter !== 'all') { - filteredJobs = allJobs.filter(job => job.status === currentFilter); - } - - if (filteredJobs.length === 0) { - historyList.innerHTML = '<p style="text-align:center; color:var(--color-text-light); padding:var(--spacing-3xl);">Nenhum job encontrado</p>'; - return; - } - - filteredJobs.reverse().forEach(job => { - const jobCard = document.createElement('div'); - jobCard.className = 'job-card'; - jobCard.innerHTML = ` - <div class="job-header"> - <div class="job-title"> - ${job.title || job.url || job.file_path || 'Job ' + job.id} - </div> - <span class="job-status status-${job.status}">${job.status === 'completed' ? 'Concluído' : job.status === 'failed' ? 'Falhado' : job.status === 'processing' ? 'Processando' : job.status}</span> - </div> - ${job.status === 'completed' ? ` - <div class="job-actions"> - ${job.result_path ? `<a href="/api/jobs/${job.id}/download" class="btn btn-small">📥 Download Completo</a>` : ''} - ${job.has_transcription ? `<a href="/api/jobs/${job.id}/transcription" class="btn btn-small btn-secondary">📄 Transcrição</a>` : ''} - ${job.has_summary ? `<a href="/api/jobs/${job.id}/summary" class="btn btn-small btn-secondary">📝 Sumário</a>` : ''} - </div> - ` : ''} - `; - historyList.appendChild(jobCard); - }); + const container = document.getElementById('historyList'); + const jobs = currentFilter === 'all' ? allJobs : allJobs.filter((job) => job.status === currentFilter); + container.innerHTML = jobs.length ? jobs.map((job) => `<div class="job">${renderJob(job)}</div>`).join('') : '<div class="empty">Nenhum job encontrado para este filtro.</div>'; } - - // Downloads + function loadDownloads() { - const downloadsList = document.getElementById('downloadsList'); - downloadsList.innerHTML = ''; - - const completedJobs = allJobs.filter(job => job.status === 'completed'); - - if (completedJobs.length === 0) { - downloadsList.innerHTML = '<p style="text-align:center; color:var(--color-text-light); padding:var(--spacing-3xl);">Nenhum arquivo disponível para download</p>'; - return; + const container = document.getElementById('downloadsList'); + const jobs = allJobs.filter((job) => job.status === 'completed' && job.result_path); + container.innerHTML = jobs.length ? jobs.map((job) => `<div class="job">${renderJob(job)}</div>`).join('') : '<div class="empty">Ainda não há arquivos prontos para download.</div>'; + } + + async function viewJobDetails(jobId) { + try { + const response = await fetch(`/api/jobs/${jobId}/details`, { credentials:'include' }); + const data = await response.json(); + if (!response.ok) throw new Error(data.detail || 'Erro ao carregar detalhes'); + document.getElementById('previewTitle').textContent = data.metadata?.title || `Job ${jobId}`; + let content = ''; + if (data.summary) { content += '<h3>Sumário</h3>' + renderPreview(data.summary, data.format) + '<hr>'; } + if (data.transcription) { content += '<h3>Transcrição</h3>' + renderPreview(data.transcription, data.format); } + document.getElementById('previewContent').innerHTML = content || '<div class="empty">Nenhum conteúdo persistido para este job.</div>'; + document.getElementById('previewModal').classList.add('open'); + } catch (error) { + alert(error.message); } - - completedJobs.reverse().forEach(job => { - const jobCard = document.createElement('div'); - jobCard.className = 'job-card'; - jobCard.innerHTML = ` - <div class="job-header"> - <div class="job-title">${job.url || job.file_path || 'Job ' + job.id}</div> - </div> - <div class="job-actions"> - ${job.result_path ? `<a href="/api/jobs/${job.id}/download" class="btn btn-small">📥 Download Completo</a>` : ''} - ${job.has_transcription ? `<a href="/api/jobs/${job.id}/transcription" class="btn btn-small btn-secondary">📄 Download Transcrição</a>` : ''} - ${job.has_summary ? `<a href="/api/jobs/${job.id}/summary" class="btn btn-small btn-secondary">📝 Download Sumário</a>` : ''} - </div> - `; - downloadsList.appendChild(jobCard); - }); } + + function renderPreview(text, format) { + const maybeMarkdown = (format === 'md' || format === 'markdown') || [/^\s*#{1,6}\s+.+$/m, /\*\*[^*]+\*\*/, /^\s*[-*+]\s+.+$/m, /^\s*\d+\.\s+.+$/m, /\[.+\]\(.+\)/, /```[\s\S]*?```/, /`[^`]+`/].some((pattern) => pattern.test(text || '')); + return maybeMarkdown && typeof marked !== 'undefined' ? marked.parse(text) : `<div>${escapeHtml(text)}</div>`; + } + + function closePreview(event) { + if (event && event.target && event.target.id !== 'previewModal') return; + document.getElementById('previewModal').classList.remove('open'); + } + + function escapeHtml(text) { const div = document.createElement('div'); div.textContent = text || ''; return div.innerHTML; } </script> </body> </html> diff --git a/lazier/web/templates/login.html b/lazier/web/templates/login.html @@ -3,260 +3,68 @@ <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> - <title>Login - Lazier</title> - <link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>📢</text></svg>"> + <title>Login · Lazier</title> <link rel="preconnect" href="https://fonts.googleapis.com"> <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> - <link href="https://fonts.googleapis.com/css2?family=Playfair+Display:wght@400;600;700&family=Work+Sans:wght@300;400;500;600;700&display=swap" rel="stylesheet"> + <link href="https://fonts.googleapis.com/css2?family=Fraunces:wght@500;700&family=Manrope:wght@400;500;600;700&display=swap" rel="stylesheet"> <style> - * { - margin: 0; - padding: 0; - box-sizing: border-box; - } - - body { - font-family: 'Work Sans', sans-serif; - background: linear-gradient(135deg, #1a237e 0%, #283593 50%, #3949ab 100%); - background-attachment: fixed; - display: flex; - flex-direction: column; - min-height: 100vh; - padding: 0; - position: relative; - overflow-x: hidden; - } - - .login-wrapper { - flex: 1; - display: flex; - justify-content: center; - align-items: center; - padding: 20px; - position: relative; - z-index: 1; - } - - body::before { - content: ''; - position: fixed; - top: 0; - left: 0; - width: 100%; - height: 100%; - background-image: - radial-gradient(circle at 20% 50%, rgba(255, 179, 0, 0.1) 0%, transparent 50%), - radial-gradient(circle at 80% 80%, rgba(255, 160, 0, 0.1) 0%, transparent 50%); - pointer-events: none; - z-index: 0; - } - - .login-container { - background: white; - padding: 48px; - border-radius: 24px; - box-shadow: 0 12px 40px rgba(0,0,0,0.3); - width: 100%; - max-width: 420px; - position: relative; - z-index: 1; - animation: fadeIn 0.5s ease; - } - - @keyframes fadeIn { - from { - opacity: 0; - transform: translateY(20px); - } - to { - opacity: 1; - transform: translateY(0); - } - } - - .login-container::before { - content: ''; - position: absolute; - top: 0; - left: 0; - right: 0; - height: 4px; - background: linear-gradient(90deg, #283593, #ffb300, #ffa000); - border-radius: 24px 24px 0 0; - } - - h1 { - font-family: 'Playfair Display', serif; - color: #283593; - margin-bottom: 8px; - text-align: center; - font-size: 2.5rem; - font-weight: 700; - letter-spacing: -1px; - } - - .subtitle { - text-align: center; - color: #546e7a; - margin-bottom: 32px; - font-size: 0.95rem; - } - - .form-group { - margin-bottom: 24px; - } - - label { - display: block; - margin-bottom: 8px; - color: #263238; - font-weight: 600; - font-size: 0.95rem; - } - - input { - width: 100%; - padding: 14px 16px; - border: 2px solid #e0e0e0; - border-radius: 12px; - font-size: 1rem; - font-family: 'Work Sans', sans-serif; - box-sizing: border-box; - transition: all 0.3s ease; - } - - input:focus { - outline: none; - border-color: #ffb300; - box-shadow: 0 0 0 3px rgba(255, 179, 0, 0.1); - } - - button { - width: 100%; - padding: 14px; - background: linear-gradient(135deg, #283593 0%, #3949ab 100%); - color: white; - border: none; - border-radius: 12px; - font-size: 1.125rem; - font-weight: 600; - font-family: 'Work Sans', sans-serif; - cursor: pointer; - margin-top: 8px; - transition: all 0.3s ease; - box-shadow: 0 4px 12px rgba(40, 53, 147, 0.2); - } - - button:hover { - transform: translateY(-2px); - box-shadow: 0 6px 20px rgba(40, 53, 147, 0.3); - } - - button:active { - transform: translateY(0); - } - - .error { - color: #d32f2f; - margin-top: 16px; - text-align: center; - font-size: 0.9rem; - padding: 12px; - background: rgba(211, 47, 47, 0.1); - border-radius: 8px; - border: 1px solid rgba(211, 47, 47, 0.2); - } - - /* ===== FOOTER ===== */ - .main-footer { - background: linear-gradient(135deg, #1a237e 0%, #283593 100%); - color: rgba(255, 255, 255, 0.9); - padding: 16px 24px; - margin-top: auto; - width: 100%; - position: relative; - overflow: hidden; - } - - .main-footer::before { - content: ''; - position: absolute; - top: 0; - left: 0; - right: 0; - height: 1px; - background: linear-gradient(90deg, transparent, rgba(255, 179, 0, 0.5), transparent); - } - - .footer-bottom { - max-width: 1200px; - margin: 0 auto; - padding-top: 8px; - border-top: 1px solid rgba(255, 255, 255, 0.1); - text-align: center; - color: rgba(255, 255, 255, 0.7); - font-size: 0.75rem; - } - - @media (max-width: 480px) { - .login-container { - padding: 32px 24px; - } - - h1 { - font-size: 2rem; - } - - .main-footer { - padding: 12px 16px; - } - } + :root { --bg:#f5f1ea; --card:rgba(255,255,255,.9); --ink:#223437; --muted:#66787b; --line:rgba(34,52,55,.12); --accent:#2f7a71; --warn:#b85f49; } + * { box-sizing:border-box; margin:0; padding:0; } + body { min-height:100vh; display:grid; place-items:center; padding:20px; font-family:'Manrope',sans-serif; color:var(--ink); background:radial-gradient(circle at top left, rgba(47,122,113,.08), transparent 26%), linear-gradient(180deg,#faf7f2 0%,#f2ede6 100%); } + .shell { width:min(980px,100%); display:grid; grid-template-columns:1.05fr .95fr; gap:18px; } + .hero,.card { background:var(--card); border:1px solid rgba(255,255,255,.9); box-shadow:0 18px 42px rgba(40,56,60,.08); backdrop-filter:blur(14px); border-radius:28px; } + .hero,.card { padding:28px; } + .hero { display:flex; flex-direction:column; justify-content:space-between; min-height:430px; } + .eyebrow { display:inline-block; background:rgba(47,122,113,.1); color:var(--accent); padding:8px 12px; border-radius:999px; font-size:.85rem; font-weight:700; } + h1,h2 { font-family:'Fraunces',serif; letter-spacing:-.04em; line-height:1.05; } + h1 { font-size:clamp(2.2rem,4vw,3.4rem); margin:18px 0; } + p,.point span,label,.footer { color:var(--muted); } + .point { border:1px solid var(--line); border-radius:18px; padding:14px; background:rgba(255,255,255,.6); margin-top:10px; } + .point strong { display:block; margin-bottom:6px; color:var(--ink); } + .card h2 { font-size:2rem; margin-bottom:8px; } + .card p { margin-bottom:22px; } + .field { display:flex; flex-direction:column; gap:8px; margin-bottom:16px; } + label { font-size:.88rem; font-weight:700; } + input { width:100%; padding:14px 16px; border-radius:16px; border:1px solid var(--line); background:rgba(255,255,255,.95); color:var(--ink); } + button { width:100%; border:0; border-radius:16px; padding:14px 16px; background:linear-gradient(135deg,var(--accent),#225b55); color:#fff; font-weight:700; cursor:pointer; margin-top:6px; } + .error { margin-top:14px; color:var(--warn); background:rgba(184,95,73,.1); border:1px solid rgba(184,95,73,.16); border-radius:14px; padding:12px 14px; } + .footer { margin-top:16px; text-align:center; font-size:.84rem; } + @media (max-width:880px) { .shell { grid-template-columns:1fr; } .hero,.card { min-height:auto; padding:22px; } } </style> </head> <body> - <div class="login-wrapper"> - <div class="login-container"> - <h1>🎧 Lazier</h1> - <form method="POST" action="/login"> - <div class="form-group"> - <label for="username">Usuário</label> - <input - type="text" - id="username" - name="username" - required - autofocus - autocomplete="username" - placeholder="Digite seu usuário" - > - </div> - <div class="form-group"> - <label for="password">Senha</label> - <input - type="password" - id="password" - name="password" - required - autocomplete="current-password" - placeholder="Digite sua senha" - > - </div> - <button type="submit">Entrar</button> - </form> + <div> + <div class="shell"> + <section class="hero"> + <div> + <div class="eyebrow">Lazier</div> + <h1>Entrada simples, saída organizada e sempre em português.</h1> + <p>O Lazier recebe mídias, páginas, textos e PDFs, persiste o histórico e mantém o fluxo mais claro: transcrever ou resumir.</p> + </div> + <div> + <div class="point"><strong>Transcrição ou sumário</strong><span>Um objetivo por vez, com resultado mais consistente.</span></div> + <div class="point"><strong>Histórico persistente</strong><span>Seus jobs continuam visíveis após reiniciar o servidor.</span></div> + <div class="point"><strong>Arquivos mais fáceis de achar</strong><span>As saídas ficam agrupadas por data e job.</span></div> + </div> + </section> + <section class="card"> + <h2>Entrar</h2> + <p>Use sua conta para acessar a WebGUI.</p> + <form method="POST" action="/login"> + <div class="field"> + <label for="username">Usuário</label> + <input type="text" id="username" name="username" required autofocus autocomplete="username" placeholder="Digite seu usuário"> + </div> + <div class="field"> + <label for="password">Senha</label> + <input type="password" id="password" name="password" required autocomplete="current-password" placeholder="Digite sua senha"> + </div> + <button type="submit">Entrar</button> + </form> + </section> </div> + <div class="footer">Desenvolvido por Pablo Murad · <span id="currentYear"></span></div> </div> - - <!-- Footer Principal --> - <footer class="main-footer"> - <div class="footer-bottom"> - <p>Desenvolvido com ❤️ por Pablo Murad - <span id="currentYear"></span></p> - </div> - </footer> - - <script> - document.addEventListener('DOMContentLoaded', () => { - document.getElementById('currentYear').textContent = new Date().getFullYear(); - }); - </script> + <script>document.getElementById('currentYear').textContent = new Date().getFullYear();</script> </body> </html> diff --git a/tests/__init__.py b/tests/__init__.py @@ -1,2 +1 @@ -# Diretório de testes -# Testes serão implementados em versões futuras +"""Testes do Lazier.""" diff --git a/tests/test_api.py b/tests/test_api.py @@ -0,0 +1,120 @@ +import importlib +import os +import shutil +import unittest +import uuid +from pathlib import Path +from unittest.mock import patch + +try: + from fastapi.testclient import TestClient +except ImportError: # pragma: no cover - ambiente sem stack web + TestClient = None + + +@unittest.skipIf(TestClient is None, "fastapi/testclient não está instalado neste ambiente") +class ApiTests(unittest.TestCase): + def setUp(self): + self.temp_dir = Path(os.getcwd()) / ".tmp-tests" / f"api-{uuid.uuid4().hex[:8]}" + self.temp_dir.mkdir(parents=True, exist_ok=True) + os.environ["OPENAI_API_KEY"] = "test-key" + os.environ["SESSION_SECRET_KEY"] = "test-session-secret" + os.environ["ADMIN_USER"] = "admin" + os.environ["ADMIN_PASSWORD"] = "secret" + os.environ["LAZIER_DATA_DIR"] = str(self.temp_dir / "data") + os.environ["LAZIER_UPLOAD_DIR"] = str(self.temp_dir / "uploads") + os.environ["LAZIER_OUTPUT_DIR"] = str(self.temp_dir / "outputs") + + import lazier.api.main as main_module + + self.main_module = importlib.reload(main_module) + self.client = TestClient(self.main_module.create_app()) + self.client.post("/login", data={"username": "admin", "password": "secret"}) + + def tearDown(self): + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def test_process_rejects_legacy_double_mode(self): + response = self.client.post( + "/api/process", + json={"url": "https://example.com/page", "format": "txt", "transcribe": True, "summarize": True}, + ) + + self.assertEqual(response.status_code, 400) + self.assertIn("nao aceita mais os dois modos", response.json()["detail"]) + + def test_upload_persists_job_and_downloads_from_store(self): + output_dir = Path(os.environ["LAZIER_OUTPUT_DIR"]) / "2026" / "03" / "31" / "sample-job" + output_dir.mkdir(parents=True, exist_ok=True) + output_path = output_dir / "transcricao.txt" + output_path.write_text("Transcrição persistida", encoding="utf-8") + + with patch( + "lazier.api.routes.process_source", + return_value={ + "mode": "transcribe", + "input_type": "audio", + "source_name": "sample.mp3", + "metadata": {"title": "Sample"}, + "transcription": "Transcrição persistida", + "summary": None, + "result_path": str(output_path), + "transcription_path": str(output_path), + "summary_path": None, + }, + ): + response = self.client.post( + "/api/upload", + files={"files": ("sample.mp3", b"fake-audio", "audio/mpeg")}, + data={"format": "txt", "mode": "transcribe"}, + ) + + self.assertEqual(response.status_code, 200) + job_id = response.json()["job_ids"][0] + + history = self.client.get("/api/history") + jobs = history.json()["jobs"] + self.assertTrue(any(job["id"] == job_id for job in jobs)) + + download = self.client.get(f"/api/jobs/{job_id}/download") + self.assertEqual(download.status_code, 200) + + def test_history_survives_app_recreation(self): + output_dir = Path(os.environ["LAZIER_OUTPUT_DIR"]) / "2026" / "03" / "31" / "sample-job" + output_dir.mkdir(parents=True, exist_ok=True) + output_path = output_dir / "sumario.txt" + output_path.write_text("Resumo persistido", encoding="utf-8") + + with patch( + "lazier.api.routes.process_source", + return_value={ + "mode": "summarize", + "input_type": "text", + "source_name": "https://example.com/article", + "metadata": {"title": "Article"}, + "transcription": "Texto completo em português", + "summary": "Resumo persistido", + "result_path": str(output_path), + "transcription_path": None, + "summary_path": str(output_path), + }, + ): + response = self.client.post( + "/api/process", + json={"url": "https://example.com/article", "format": "txt", "mode": "summarize"}, + ) + + job_id = response.json()["job_id"] + + import lazier.api.main as main_module + + reloaded_main = importlib.reload(main_module) + second_client = TestClient(reloaded_main.create_app()) + second_client.post("/login", data={"username": "admin", "password": "secret"}) + + history = second_client.get("/api/history") + jobs = history.json()["jobs"] + persisted_job = next(job for job in jobs if job["id"] == job_id) + + self.assertEqual(persisted_job["status"], "completed") + self.assertEqual(persisted_job["mode"], "summarize") diff --git a/tests/test_jobs.py b/tests/test_jobs.py @@ -0,0 +1,64 @@ +import os +import shutil +import unittest +import uuid +from datetime import datetime +from pathlib import Path + +from lazier.core.jobs import JobStore, build_job_artifact_path, build_job_output_dir + + +class JobStoreTests(unittest.TestCase): + def setUp(self): + self.temp_dir = Path(os.getcwd()) / ".tmp-tests" / f"jobs-{uuid.uuid4().hex[:8]}" + self.temp_dir.mkdir(parents=True, exist_ok=True) + os.environ["LAZIER_DATA_DIR"] = str(self.temp_dir) + os.environ["LAZIER_OUTPUT_DIR"] = str(self.temp_dir) + self.store = JobStore(self.temp_dir / "jobs.db") + + def tearDown(self): + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def test_persists_and_marks_incomplete_jobs_as_interrupted(self): + self.store.create_job( + { + "id": "job-1", + "mode": "transcribe", + "status": "processing", + "progress": 42, + "format": "txt", + "source_name": "arquivo.mp3", + "created_at": "2026-03-31T12:00:00", + } + ) + + interrupted = self.store.mark_incomplete_as_interrupted() + job = self.store.get_job("job-1") + + self.assertEqual(interrupted, 1) + self.assertEqual(job["status"], "interrupted") + self.assertEqual(job["progress"], 42) + self.assertIn("reinicializacao", job["error"]) + + def test_builds_output_paths_with_date_job_and_expected_names(self): + created_at = datetime(2026, 3, 31, 15, 45, 0) + output_dir = build_job_output_dir( + job_id="abc12345-xyz", + source_name="Minha Reunião.mp3", + created_at=created_at, + output_root=self.temp_dir, + ) + summary_path = build_job_artifact_path( + job_id="abc12345-xyz", + source_name="Minha Reunião.mp3", + format_type="txt", + artifact_kind="summary", + created_at=created_at, + output_root=self.temp_dir, + ) + + self.assertIn("2026", str(output_dir)) + self.assertIn("03", str(output_dir)) + self.assertIn("31", str(output_dir)) + self.assertTrue(output_dir.name.startswith("minha-reuniao-abc12345")) + self.assertEqual(summary_path.name, "sumario.txt") diff --git a/tests/test_processing.py b/tests/test_processing.py @@ -0,0 +1,65 @@ +import os +import shutil +import unittest +import uuid +from pathlib import Path +from unittest.mock import patch + +from lazier.core.processing import process_source + + +class ProcessingTests(unittest.TestCase): + def setUp(self): + self.temp_dir = Path(os.getcwd()) / ".tmp-tests" / f"processing-{uuid.uuid4().hex[:8]}" + self.temp_dir.mkdir(parents=True, exist_ok=True) + os.environ["OPENAI_API_KEY"] = "test-key" + os.environ["LAZIER_OUTPUT_DIR"] = str(self.temp_dir) + os.environ["LAZIER_DATA_DIR"] = str(self.temp_dir) + + def tearDown(self): + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def test_audio_transcribe_generates_portuguese_transcription_file(self): + audio_path = self.temp_dir / "sample.mp3" + audio_path.write_bytes(b"fake-audio") + + with patch("lazier.core.processing.transcribe_audio", return_value="Hello world"), patch( + "lazier.core.processing.render_text_in_portuguese", return_value="Olá mundo" + ): + result = process_source( + str(audio_path), + mode="transcribe", + output_format="txt", + run_id="job-audio", + source_name="sample.mp3", + output_root=self.temp_dir, + ) + + self.assertEqual(result["transcription"], "Olá mundo") + self.assertTrue(result["result_path"].endswith("transcricao.txt")) + self.assertTrue(Path(result["result_path"]).exists()) + + def test_text_summarize_generates_summary_file(self): + text_path = self.temp_dir / "article.txt" + text_path.write_text("This is a long article in English.", encoding="utf-8") + + with patch( + "lazier.core.processing.render_text_in_portuguese", + return_value="Este é um artigo longo em português.", + ), patch( + "lazier.core.processing.summarize_text", + return_value="Resumo em português.", + ): + result = process_source( + str(text_path), + mode="summarize", + output_format="txt", + run_id="job-text", + source_name="article.txt", + output_root=self.temp_dir, + ) + + self.assertEqual(result["summary"], "Resumo em português.") + self.assertEqual(result["transcription"], "Este é um artigo longo em português.") + self.assertTrue(result["result_path"].endswith("sumario.txt")) + self.assertTrue(Path(result["result_path"]).exists())