Skip to content

Commit fa8f0d2

Browse files
authored
improv: add api speed log and improve mysql speed
improv: add api speed log and improve mysql speed
2 parents 2eeb21d + fd083eb commit fa8f0d2

8 files changed

Lines changed: 265 additions & 144 deletions

File tree

llm_web_kit/api/dependencies.py

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,27 @@
44
"""
55

66
import logging
7+
import os
8+
from contextvars import ContextVar
79
from functools import lru_cache
10+
from logging.handlers import TimedRotatingFileHandler
811
from typing import Optional
912

1013
from pydantic_settings import BaseSettings, SettingsConfigDict
1114

1215
logger = logging.getLogger(__name__)
1316

17+
# 创建一个 ContextVar 用于存储 request_id,提供默认值
18+
request_id_var: ContextVar[Optional[str]] = ContextVar("request_id", default=None)
19+
20+
21+
class RequestIdFilter(logging.Filter):
22+
"""日志过滤器,用于将 request_id 从 ContextVar 注入到日志记录中。"""
23+
24+
def filter(self, record):
25+
record.request_id = request_id_var.get()
26+
return True
27+
1428

1529
class Settings(BaseSettings):
1630
"""应用配置设置."""
@@ -27,6 +41,8 @@ class Settings(BaseSettings):
2741

2842
# 日志配置
2943
log_level: str = "INFO"
44+
log_dir: str = "logs"
45+
log_filename: str = "api.log"
3046

3147
# 模型配置
3248
model_path: Optional[str] = None
@@ -38,8 +54,8 @@ class Settings(BaseSettings):
3854

3955
# 数据库配置
4056
database_url: Optional[str] = None # 从环境变量 DATABASE_URL 读取
41-
db_pool_size: int = 5
42-
db_max_overflow: int = 10
57+
db_pool_size: int = 200
58+
db_max_overflow: int = 100
4359

4460
# pydantic v2 配置写法
4561
model_config = SettingsConfigDict(
@@ -57,14 +73,39 @@ def get_settings() -> Settings:
5773
def get_logger(name: str = __name__) -> logging.Logger:
5874
"""获取配置好的日志记录器."""
5975
logger = logging.getLogger(name)
76+
logger.setLevel(get_settings().log_level)
77+
logger.addFilter(RequestIdFilter()) # 添加过滤器
78+
6079
if not logger.handlers:
61-
handler = logging.StreamHandler()
62-
formatter = logging.Formatter(
63-
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
80+
# 控制台处理器
81+
stream_handler = logging.StreamHandler()
82+
stream_formatter = logging.Formatter(
83+
'%(asctime)s - %(request_id)s - %(name)s - %(levelname)s - %(message)s'
84+
)
85+
stream_handler.setFormatter(stream_formatter)
86+
logger.addHandler(stream_handler)
87+
88+
# 文件处理器 (按天轮换)
89+
settings = get_settings()
90+
log_dir = settings.log_dir
91+
if not os.path.exists(log_dir):
92+
os.makedirs(log_dir)
93+
94+
log_file_path = os.path.join(log_dir, settings.log_filename)
95+
96+
file_handler = TimedRotatingFileHandler(
97+
log_file_path,
98+
when="midnight", # 每天午夜轮换
99+
interval=1,
100+
backupCount=30, # 保留30天的日志
101+
encoding='utf-8'
64102
)
65-
handler.setFormatter(formatter)
66-
logger.addHandler(handler)
67-
logger.setLevel(get_settings().log_level)
103+
file_formatter = logging.Formatter(
104+
'%(asctime)s - %(request_id)s - %(name)s - %(levelname)s - %(message)s'
105+
)
106+
file_handler.setFormatter(file_formatter)
107+
logger.addHandler(file_handler)
108+
68109
return logger
69110

70111

llm_web_kit/api/main.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
"""
55

66
import uvicorn
7-
from fastapi import FastAPI
7+
from fastapi import FastAPI, Request
88
from fastapi.middleware.cors import CORSMiddleware
99
from fastapi.responses import JSONResponse
1010

11-
from .dependencies import get_inference_service, get_logger, get_settings
11+
from .dependencies import (get_inference_service, get_logger, get_settings,
12+
request_id_var)
1213
from .routers import htmls
14+
from .services.request_log_service import RequestLogService
1315

1416
settings = get_settings()
1517
logger = get_logger(__name__)
@@ -33,6 +35,30 @@
3335
allow_headers=["*"],
3436
)
3537

38+
39+
@app.middleware("http")
40+
async def request_id_middleware(request: Request, call_next):
41+
"""中间件,用于生成 request_id 并通过 ContextVar 在整个请求周期中传递。"""
42+
# 从请求头中获取 request_id,如果不存在则生成一个新的
43+
request_id = request.headers.get("X-Request-ID")
44+
if not request_id:
45+
request_id = RequestLogService._generate_request_id()
46+
47+
# 使用 ContextVar 设置 request_id
48+
token = request_id_var.set(request_id)
49+
50+
# 处理请求
51+
response = await call_next(request)
52+
53+
# 在响应头中添加 request_id
54+
response.headers["X-Request-ID"] = request_id
55+
56+
# 重置 ContextVar
57+
request_id_var.reset(token)
58+
59+
return response
60+
61+
3662
# 注册路由
3763
app.include_router(htmls.router, prefix="/api/v1", tags=["HTML 处理"])
3864

llm_web_kit/api/routers/htmls.py

Lines changed: 63 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,17 @@
33
提供 HTML 解析、内容提取等功能的 API 端点。
44
"""
55

6+
import base64
7+
import html
8+
import time
69
from typing import Optional
710

8-
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
11+
from fastapi import (APIRouter, BackgroundTasks, Body, Depends, File,
12+
HTTPException, UploadFile)
913
from sqlalchemy.ext.asyncio import AsyncSession
1014

1115
from ..database import get_db_session
12-
from ..dependencies import get_logger, get_settings
16+
from ..dependencies import get_logger, get_settings, request_id_var
1317
from ..models.request import HTMLParseRequest
1418
from ..models.response import HTMLParseResponse
1519
from ..services.html_service import HTMLService
@@ -23,16 +27,20 @@
2327

2428
@router.post('/html/parse', response_model=HTMLParseResponse)
2529
async def parse_html(
26-
request: HTMLParseRequest,
27-
html_service: HTMLService = Depends(HTMLService),
28-
db_session: Optional[AsyncSession] = Depends(get_db_session)
30+
background_tasks: BackgroundTasks,
31+
request: HTMLParseRequest = Body(...),
32+
html_service: HTMLService = Depends(HTMLService),
33+
db_session: Optional[AsyncSession] = Depends(get_db_session)
2934
):
3035
"""解析 HTML 内容.
3136
3237
接收 HTML 字符串并返回解析后的结构化内容。
3338
"""
34-
# 生成请求ID
35-
request_id = RequestLogService.generate_request_id()
39+
# 从 context var 获取 request_id
40+
request_id = request_id_var.get()
41+
decoded_bytes = base64.b64decode(request.html_content)
42+
decoded_str = decoded_bytes.decode('utf-8')
43+
unescaped_html = html.unescape(decoded_str)
3644

3745
# 确定输入类型
3846
if request.html_content:
@@ -43,35 +51,32 @@ async def parse_html(
4351
input_type = 'unknown'
4452

4553
# 创建请求日志
46-
await RequestLogService.create_log(
54+
start_time = time.time()
55+
await RequestLogService.initial_log(
4756
session=db_session,
4857
request_id=request_id,
4958
input_type=input_type,
50-
input_html=request.html_content,
59+
input_html=unescaped_html,
5160
url=request.url,
5261
)
53-
54-
# 立即提交,使 processing 状态在数据库中可见
55-
if db_session:
56-
try:
57-
await db_session.commit()
58-
except Exception as commit_error:
59-
logger.error(f'提交初始日志时出错: {commit_error}')
62+
end_time = time.time()
63+
logger.info(f'创建日志耗时: {end_time - start_time}秒')
6064

6165
try:
62-
logger.info(f'开始解析 HTML [request_id={request_id}],内容长度: {len(request.html_content) if request.html_content else 0}')
66+
logger.info(f'开始解析 HTML,内容长度: {len(unescaped_html) if unescaped_html else 0}')
6367

6468
result = await html_service.parse_html(
65-
html_content=request.html_content,
69+
html_content=unescaped_html,
6670
url=request.url,
71+
request_id=request_id,
6772
options=request.options
6873
)
6974

70-
# 更新日志为成功
71-
await RequestLogService.update_log_success(
72-
session=db_session,
73-
request_id=request_id,
74-
output_markdown=result.get('markdown'),
75+
# 将成功日志更新操作添加到后台任务
76+
background_tasks.add_task(
77+
RequestLogService.log_success_bg,
78+
request_id,
79+
result.get('markdown')
7580
)
7681

7782
return HTMLParseResponse(
@@ -81,37 +86,32 @@ async def parse_html(
8186
request_id=request_id
8287
)
8388
except Exception as e:
84-
logger.error(f'HTML 解析失败 [request_id={request_id}]: {str(e)}')
85-
86-
# 更新日志为失败
87-
await RequestLogService.update_log_failure(
88-
session=db_session,
89-
request_id=request_id,
90-
error_message=str(e),
89+
error_message = str(e)
90+
logger.error(f'HTML 解析失败: {error_message}')
91+
92+
# 将失败日志更新操作添加到后台任务
93+
background_tasks.add_task(
94+
RequestLogService.log_failure_bg,
95+
request_id,
96+
error_message
9197
)
9298

93-
# 手动提交事务,确保失败日志被保存
94-
if db_session:
95-
try:
96-
await db_session.commit()
97-
except Exception as commit_error:
98-
logger.error(f'提交失败日志时出错: {commit_error}')
99-
100-
raise HTTPException(status_code=500, detail=f'HTML 解析失败: {str(e)}')
99+
raise HTTPException(status_code=500, detail=f'HTML 解析失败: {error_message}')
101100

102101

103102
@router.post('/html/upload')
104103
async def upload_html_file(
105-
file: UploadFile = File(...),
106-
html_service: HTMLService = Depends(HTMLService),
107-
db_session: Optional[AsyncSession] = Depends(get_db_session)
104+
background_tasks: BackgroundTasks,
105+
file: UploadFile = File(...),
106+
html_service: HTMLService = Depends(HTMLService),
107+
db_session: Optional[AsyncSession] = Depends(get_db_session)
108108
):
109109
"""上传 HTML 文件进行解析.
110110
111111
支持上传 HTML 文件,自动解析并返回结果。
112112
"""
113-
# 生成请求ID
114-
request_id = RequestLogService.generate_request_id()
113+
# 从 context var 获取 request_id
114+
request_id = request_id_var.get()
115115

116116
try:
117117
if not file.filename.endswith(('.html', '.htm')):
@@ -120,31 +120,26 @@ async def upload_html_file(
120120
content = await file.read()
121121
html_content = content.decode('utf-8')
122122

123-
logger.info(f'上传 HTML 文件 [request_id={request_id}]: {file.filename}, 大小: {len(content)} bytes')
124-
123+
logger.info(f'上传 HTML 文件: {file.filename}, 大小: {len(content)} bytes')
124+
start_time = time.time()
125125
# 创建请求日志
126-
await RequestLogService.create_log(
126+
await RequestLogService.initial_log(
127127
session=db_session,
128128
request_id=request_id,
129129
input_type='file',
130130
input_html=html_content,
131131
url=None,
132132
)
133+
end_time = time.time()
134+
logger.info(f'创建日志耗时: {end_time - start_time}秒')
133135

134-
# 立即提交,使 processing 状态在数据库中可见
135-
if db_session:
136-
try:
137-
await db_session.commit()
138-
except Exception as commit_error:
139-
logger.error(f'提交初始日志时出错: {commit_error}')
140-
141-
result = await html_service.parse_html(html_content=html_content, url="www.baidu.com")
136+
result = await html_service.parse_html(html_content=html_content, url="www.baidu.com", request_id=request_id)
142137

143-
# 更新日志为成功
144-
await RequestLogService.update_log_success(
145-
session=db_session,
146-
request_id=request_id,
147-
output_markdown=result.get('markdown'),
138+
# 将成功日志更新操作添加到后台任务
139+
background_tasks.add_task(
140+
RequestLogService.log_success_bg,
141+
request_id,
142+
result.get('markdown')
148143
)
149144

150145
return HTMLParseResponse(
@@ -154,23 +149,17 @@ async def upload_html_file(
154149
request_id=request_id
155150
)
156151
except Exception as e:
157-
logger.error(f'HTML 文件解析失败 [request_id={request_id}]: {str(e)}')
158-
159-
# 更新日志为失败
160-
await RequestLogService.update_log_failure(
161-
session=db_session,
162-
request_id=request_id,
163-
error_message=str(e),
152+
error_message = str(e)
153+
logger.error(f'HTML 文件解析失败: {error_message}')
154+
155+
# 将失败日志更新操作添加到后台任务
156+
background_tasks.add_task(
157+
RequestLogService.log_failure_bg,
158+
request_id,
159+
error_message
164160
)
165161

166-
# 手动提交事务,确保失败日志被保存
167-
if db_session:
168-
try:
169-
await db_session.commit()
170-
except Exception as commit_error:
171-
logger.error(f'提交失败日志时出错: {commit_error}')
172-
173-
raise HTTPException(status_code=500, detail=f'HTML 文件解析失败: {str(e)}')
162+
raise HTTPException(status_code=500, detail=f'HTML 文件解析失败: {error_message}')
174163

175164

176165
@router.get('/html/status')

0 commit comments

Comments
 (0)