Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 2 additions & 21 deletions backend/app/crud/evaluations/langfuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,28 +492,9 @@ def fetch_trace_scores_from_langfuse(
)
continue

# 4. Identify complete scores (all traces must have the score)
total_traces = len(traces)
complete_score_names = {
name
for name, data in score_aggregations.items()
if len(data["values"]) == total_traces
}

# 5. Filter trace scores to only include complete scores
for trace in traces:
trace["scores"] = [
score
for score in trace["scores"]
if score["name"] in complete_score_names
]

# 6. Calculate summary scores (only for complete scores)
# 4. Calculate summary scores for all scores that have at least one value
summary_scores = []
for score_name, agg_data in score_aggregations.items():
if score_name not in complete_score_names:
continue

data_type = agg_data["data_type"]
values = agg_data["values"]

Expand Down Expand Up @@ -552,7 +533,7 @@ def fetch_trace_scores_from_langfuse(

logger.info(
f"[fetch_trace_scores_from_langfuse] Successfully fetched scores | "
f"total_traces={len(traces)} | complete_scores={list(complete_score_names)}"
f"total_traces={len(traces)} | score_names={list(score_aggregations.keys())}"
)

return result
Expand Down
8 changes: 6 additions & 2 deletions backend/app/services/evaluations/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,10 @@ def get_evaluation_with_scores(
if not get_trace_info:
return eval_run, None

# Check if we already have cached traces
# Caching strategy: On the first request, trace scores are fetched from Langfuse
# and a copy is stored in S3. Subsequent requests serve from
# S3 instead of Langfuse, which is significantly faster. Use resync_score=true
# to bypass the cache and re-fetch from Langfuse and store to S3 again.
has_cached_traces_s3 = eval_run.score_trace_url is not None
has_cached_traces_db = eval_run.score is not None and "traces" in eval_run.score
if not resync_score:
Expand Down Expand Up @@ -276,6 +279,7 @@ def get_evaluation_with_scores(
)
return eval_run, None

# Resync requested — fetch fresh scores from Langfuse
langfuse = get_langfuse_client(
session=session,
org_id=organization_id,
Expand Down Expand Up @@ -321,7 +325,7 @@ def get_evaluation_with_scores(

merged_summary_scores = list(existing_scores_map.values())

# Build final score with merged summary_scores and traces
# Build final score and persist to S3/DB for future cached reads
score = {
"summary_scores": merged_summary_scores,
"traces": langfuse_score.get("traces", []),
Expand Down
Loading