Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/ai-server/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "decoded-ai"
version = "1.5.0"
version = "1.5.1"
description = ""
authors = [
{name = "CIOI",email = "rhkr9693@gmail.com"}
Expand Down
147 changes: 146 additions & 1 deletion packages/ai-server/scripts/backfill_starstyle_posts.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,15 @@ def _env(name: str, *, required: bool = True) -> str:
_SUPABASE_URL = _env("ASSETS_DATABASE_API_URL").rstrip("/")
_SERVICE_ROLE_KEY = _env("ASSETS_DATABASE_SERVICE_ROLE_KEY")

# R2 — starstyle 의 og:image 는 hotlink 보호 + HTTPS→HTTP 리다이렉트 때문에
# admin (HTTPS) 에서 직접 fetch 가 안 됨. 백필 시 R2 에 미러링해서 image_url 을
# R2 public URL 로 저장 → admin 이 vercel proxy 우회.
_R2_ACCOUNT_ID = _env("RAW_POSTS_R2_ACCOUNT_ID", required=False)
_R2_ACCESS_KEY = _env("RAW_POSTS_R2_ACCESS_KEY_ID", required=False)
_R2_SECRET_KEY = _env("RAW_POSTS_R2_SECRET_ACCESS_KEY", required=False)
_R2_BUCKET = _env("RAW_POSTS_R2_BUCKET", required=False) or "raw"
_R2_PUBLIC_URL = (_env("RAW_POSTS_R2_PUBLIC_URL", required=False) or "").rstrip("/")

_INSERT_BATCH = 100

logger = logging.getLogger("backfill_starstyle")
Expand All @@ -89,7 +98,7 @@ def _env(name: str, *, required: bool = True) -> str:
# ---------------------------------------------------------- types & helpers


@dataclass(frozen=True)
@dataclass
class PostData:
post_id: str
slug: str
Expand Down Expand Up @@ -311,6 +320,93 @@ async def fetch_existing_external_ids(http: httpx.AsyncClient) -> Set[str]:
return out


# --------------------------------------------------- R2 mirror (hero image)


def _r2_client():
"""boto3 S3 client for Cloudflare R2. Returns None if not configured."""
if not (_R2_ACCOUNT_ID and _R2_ACCESS_KEY and _R2_SECRET_KEY):
return None
import boto3 # local import — backfill 만 사용
from botocore.client import Config

return boto3.client(
"s3",
endpoint_url=f"https://{_R2_ACCOUNT_ID}.r2.cloudflarestorage.com",
aws_access_key_id=_R2_ACCESS_KEY,
aws_secret_access_key=_R2_SECRET_KEY,
config=Config(signature_version="s3v4"),
region_name="auto",
)


def _build_r2_key(external_id: str) -> str:
"""``starstyle/{shard}/{id}.jpg``. pipeline._build_r2_key 와 같은 형식.

starstyle 의 hero 는 항상 jpg (og:image 의 ``.jpg`` 확장자) — 다양한
포맷을 처리하지 않고 jpg 로 통일. 다른 포맷이 들어오면 그대로 jpg 로 저장
(R2 ContentType 에서 결정, 브라우저는 sniffing).
"""
safe = "".join(c if c.isalnum() else "-" for c in external_id).strip("-") or "x"
shard = safe[:2] or "_"
return f"starstyle/{shard}/{safe}.jpg"


async def mirror_image_to_r2(
http: httpx.AsyncClient,
*,
s3,
image_url: str,
external_id: str,
referer: str,
) -> Optional[str]:
"""upstream 이미지 다운로드 → R2 put → R2 public URL 반환.

실패 시 None — caller 가 image_url 로 upstream URL 그대로 사용.
이미 R2 에 있으면 (HEAD 200) 다운로드 skip.
"""
if s3 is None or not _R2_PUBLIC_URL:
return None
key = _build_r2_key(external_id)
public_url = f"{_R2_PUBLIC_URL}/{key}"
# HEAD 로 존재 확인 — 재실행 시 중복 업로드 방지.
try:
await asyncio.to_thread(s3.head_object, Bucket=_R2_BUCKET, Key=key)
return public_url
except Exception:
pass

try:
resp = await http.get(
image_url,
headers={"User-Agent": _USER_AGENT, "Referer": referer},
follow_redirects=True,
timeout=30,
)
resp.raise_for_status()
data = resp.content
ct = (resp.headers.get("content-type") or "image/jpeg").split(";")[0].strip()
if not ct.startswith("image/"):
logger.warning("mirror: skip %s — bad content-type %s", external_id, ct)
return None
except Exception as exc:
logger.warning("mirror: download failed for %s — %s", external_id, exc)
return None

try:
await asyncio.to_thread(
s3.put_object,
Bucket=_R2_BUCKET,
Key=key,
Body=data,
ContentType=ct,
)
except Exception as exc:
logger.warning("mirror: R2 put failed for %s — %s", external_id, exc)
return None
return public_url


async def ensure_global_feed_source(http: httpx.AsyncClient) -> str:
body = [
{
Expand Down Expand Up @@ -531,6 +627,55 @@ async def _run(args) -> int:
source_id = await ensure_global_feed_source(http)
logger.info(" source_id = %s", source_id)

# R2 mirror — admin (HTTPS) 에서 starstyle 이미지 hotlink 가 막혀
# backfill 시 R2 에 미러링하고 image_url 을 R2 public URL 로 저장.
s3 = _r2_client()
if s3 is not None:
logger.info(
"R2 mirror: bucket=%s public=%s — mirroring %d images",
_R2_BUCKET,
_R2_PUBLIC_URL,
len(posts_new),
)
sem = asyncio.Semaphore(args.concurrency)
mirrored = 0
failed = 0

async def _mirror_one(d: PostData):
nonlocal mirrored, failed
async with sem:
new_url = await mirror_image_to_r2(
http,
s3=s3,
image_url=d.image_url,
external_id=d.post_id,
referer=d.url,
)
if new_url:
d.image_url = new_url
mirrored += 1
else:
failed += 1
if (mirrored + failed) % 50 == 0:
logger.info(
" mirror progress: %d done (%d mirrored, %d failed)",
mirrored + failed,
mirrored,
failed,
)

await asyncio.gather(*(_mirror_one(p) for p in posts_new))
logger.info(
"R2 mirror done: %d mirrored, %d failed (fallback to upstream)",
mirrored,
failed,
)
else:
logger.warning(
"R2 not configured (RAW_POSTS_R2_* env missing) — "
"image_url 은 upstream URL 그대로 (admin preview 깨짐)"
)

dispatch_id = f"backfill-{uuid.uuid4().hex[:12]}"
total_inserted = 0
n_batches = (len(posts_new) + _INSERT_BATCH - 1) // _INSERT_BATCH
Expand Down
2 changes: 1 addition & 1 deletion packages/api-server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ tokio-test = "0.4"

[package]
name = "decoded-api"
version = "0.10.0"
version = "0.10.1"
edition = "2021"
license = "MIT"
default-run = "decoded-api"
Expand Down
28 changes: 25 additions & 3 deletions packages/api-server/src/app_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ pub struct AppState {
pub post_list_cache: Arc<PostListCache>,

// Trait 기반 클라이언트
pub storage_client: Arc<dyn StorageClient>,
pub operation_storage: Arc<dyn StorageClient>,
/// raw_posts 전용 R2 (#466 R2 cleanup) — ai-server 가 hero/thumbnail 을
/// 올리는 별도 버킷. delete_item 시 verify 안 된 raw_post 의 R2 객체 삭제.
pub assets_storage: Arc<dyn StorageClient>,
pub search_client: Arc<dyn SearchClient>,
pub affiliate_client: Arc<dyn AffiliateClient>,
pub embedding_client: Arc<dyn EmbeddingClient>,
Expand Down Expand Up @@ -63,7 +66,7 @@ impl AppState {
let assets_db = Arc::new(config.create_assets_db_connection().await?);
tracing::info!("Assets DB connection established");

let storage_client: Arc<dyn StorageClient> =
let operation_storage: Arc<dyn StorageClient> =
match CloudflareR2Client::new(&config.storage).await {
Ok(client) => {
tracing::info!("CloudflareR2Client initialized successfully");
Expand All @@ -78,6 +81,24 @@ impl AppState {
}
};

// raw_posts 전용 R2 (#466 R2 cleanup) — ai-server 가 쓰는 별도 버킷.
// 미설정 시 dummy 로 fallback (delete cleanup 만 영향, 운영 노이즈 X).
let assets_storage: Arc<dyn StorageClient> =
match CloudflareR2Client::new(&config.assets_storage).await {
Ok(client) => {
tracing::info!("raw_posts CloudflareR2Client initialized successfully");
Arc::new(client)
}
Err(e) => {
tracing::warn!(
"Failed to initialize raw_posts CloudflareR2Client: {}. \
R2 cleanup on raw_post delete will be skipped.",
e
);
Arc::new(crate::services::DummyStorageClient::default())
}
};

let search_client: Arc<dyn SearchClient> = match MeilisearchClient::new(&config.search) {
Ok(client) => {
tracing::info!("MeilisearchClient initialized successfully");
Expand Down Expand Up @@ -156,7 +177,8 @@ impl AppState {
config,
category_cache,
post_list_cache,
storage_client,
operation_storage,
assets_storage,
search_client,
affiliate_client,
embedding_client,
Expand Down
19 changes: 19 additions & 0 deletions packages/api-server/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ pub struct AppConfig {
pub assets_database: AssetsDatabaseConfig,
pub auth: AuthConfig,
pub storage: StorageConfig,
/// raw_posts 전용 R2 (#466 R2 cleanup) — ai-server 가 hero/thumbnail 을
/// 업로드하는 별도 버킷. ``RAW_POSTS_R2_*`` env. delete_item 시 verify
/// 안 된 raw_post 의 R2 객체 정리에 사용.
pub assets_storage: StorageConfig,
pub search: SearchConfig,
pub affiliate: AffiliateConfig,
pub ai_service: AiServiceConfig,
Expand Down Expand Up @@ -271,6 +275,21 @@ impl AppConfig {
.unwrap_or_else(|_| "decoded-images".to_string()),
public_url: std::env::var("R2_PUBLIC_URL").unwrap_or_else(|_| String::new()),
},
assets_storage: StorageConfig {
endpoint: std::env::var("RAW_POSTS_R2_ACCOUNT_ID")
.map(|id| format!("https://{}.r2.cloudflarestorage.com", id))
.unwrap_or_else(|_| String::new()),
account_id: std::env::var("RAW_POSTS_R2_ACCOUNT_ID")
.unwrap_or_else(|_| String::new()),
access_key_id: std::env::var("RAW_POSTS_R2_ACCESS_KEY_ID")
.unwrap_or_else(|_| String::new()),
secret_access_key: std::env::var("RAW_POSTS_R2_SECRET_ACCESS_KEY")
.unwrap_or_else(|_| String::new()),
bucket_name: std::env::var("RAW_POSTS_R2_BUCKET")
.unwrap_or_else(|_| "raw".to_string()),
public_url: std::env::var("RAW_POSTS_R2_PUBLIC_URL")
.unwrap_or_else(|_| String::new()),
},
search: SearchConfig {
url: std::env::var("MEILISEARCH_URL")
.unwrap_or_else(|_| "http://localhost:7700".to_string()),
Expand Down
14 changes: 7 additions & 7 deletions packages/api-server/src/domains/posts/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -291,9 +291,9 @@ pub async fn create_post_without_solutions(
.inspect_err(|_e| {
// Post 생성 실패 시 업로드된 이미지 삭제
let image_key_clone = image_key.clone();
let storage_client = state.storage_client.clone();
let storage = state.operation_storage.clone();
tokio::spawn(async move {
if let Err(delete_err) = storage_client.delete(&image_key_clone).await {
if let Err(delete_err) = storage.delete(&image_key_clone).await {
tracing::warn!(
"Failed to delete orphaned image {}: {}",
image_key_clone,
Expand Down Expand Up @@ -363,9 +363,9 @@ pub async fn create_post_with_solutions(
.inspect_err(|_e| {
// Post 생성 실패 시 업로드된 이미지 삭제
let image_key_clone = image_key.clone();
let storage_client = state.storage_client.clone();
let storage = state.operation_storage.clone();
tokio::spawn(async move {
if let Err(delete_err) = storage_client.delete(&image_key_clone).await {
if let Err(delete_err) = storage.delete(&image_key_clone).await {
tracing::warn!(
"Failed to delete orphaned image {}: {}",
image_key_clone,
Expand Down Expand Up @@ -1734,7 +1734,7 @@ pub async fn upload_image(

// StorageClient를 사용하여 업로드
let image_url = state
.storage_client
.operation_storage
.upload(&key, image_data, content_type)
.await
.map_err(|e| AppError::ExternalService(format!("Failed to upload image: {}", e)))?;
Expand Down Expand Up @@ -2022,9 +2022,9 @@ pub async fn create_try_post(
.inspect_err(|_| {
// 실패 시 업로드된 이미지 삭제
let image_key_clone = image_key.clone();
let storage_client = state.storage_client.clone();
let storage = state.operation_storage.clone();
tokio::spawn(async move {
if let Err(e) = storage_client.delete(&image_key_clone).await {
if let Err(e) = storage.delete(&image_key_clone).await {
tracing::warn!(
"Failed to delete orphaned try image {}: {}",
image_key_clone,
Expand Down
9 changes: 8 additions & 1 deletion packages/api-server/src/domains/raw_posts/handlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,14 @@ pub async fn delete_item(
State(state): State<AppState>,
Path(id): Path<Uuid>,
) -> AppResult<StatusCode> {
service::delete_item(state.assets_db.as_ref(), id).await?;
service::delete_item(
state.assets_db.as_ref(),
state.db.as_ref(),
state.assets_storage.as_ref(),
&state.config.assets_storage.public_url,
id,
)
.await?;
Ok(StatusCode::NO_CONTENT)
}

Expand Down
Loading