app.py
1 import sys 2 import os 3 4 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # when app is in server/ 5 6 7 from openenv.core.env_server import create_fastapi_app 8 from fastapi import FastAPI 9 from fastapi.responses import JSONResponse 10 11 from models import EngAction, EngObservation 12 13 from environment import EngEnv 14 from task_loader import TASKS, TESTS 15 from grader import grader 16 from pydantic import BaseModel 17 18 # Base app from OpenEnv — gives /ws /reset /step /state /health /web /docs 19 app = create_fastapi_app(EngEnv,EngAction,EngObservation)# ── Additional required endpoints ──────────────────────────────────────────── 20 21 @app.get("/tasks") 22 def list_tasks(): 23 """ 24 Returns all available tasks and their action schema. 25 Required by PS: /tasks endpoint. 26 """ 27 all_tasks = [] 28 for level in ['EASY', 'MEDIUM', 'HARD']: 29 for task in TASKS[level]: 30 all_tasks.append({ 31 "id": task["id"], 32 "domain": task["domain"], 33 "difficulty": level, 34 "name": task["name"], 35 "description": task["description"], 36 "action_schema": { 37 "sol": "str # Your complete Python solution code" 38 } 39 }) 40 return JSONResponse(content={"tasks": all_tasks, "total": len(all_tasks)}) 41 42 43 44 # @app.get("/grader") 45 # def get_grader_score(task_id: str, code: str): 46 # try: 47 # result = grader(code, task_id) 48 # return JSONResponse(content={ 49 # "task_id": task_id, 50 # "grader_score": result["grader_score"], 51 # "status": result["status"], 52 # "tests_passed": result["tests_passed"], 53 # "total_tests": result["total_tests"], 54 # "efficiency": result["efficiency"], 55 # }) 56 # except KeyError: 57 # return JSONResponse(status_code=404, content={"error": f"Task '{task_id}' not found"}) 58 # except Exception as e: 59 # return JSONResponse(status_code=500, content={"error": str(e)}) 60 61 # from pydantic import BaseModel 62 # from fastapi.responses import JSONResponse 63 64 # 1. Ensure the model is defined 65 class GraderRequest(BaseModel): 66 task_id: str 67 code: str 68 69 @app.post("/grader") 70 def get_grader_score(request: GraderRequest): 71 try: 72 # 2. CRITICAL: You must use request.code and request.task_id 73 # If you used 'code' or 'task_id' directly, it caused the 500 error. 74 result = grader(request.code, request.task_id) 75 76 # 3. Apply the mandatory Phase 2 clamp (strictly between 0 and 1) 77 raw_score = result.get("grader_score", 0.01) 78 clamped_score = max(0.01, min(raw_score, 0.99)) 79 clamped_score = float(clamped_score) 80 # res["grader_score"] = max(0.05, min(float(raw_score), 0.95)) 81 # return JSONResponse(content=res) 82 83 return JSONResponse(content={ 84 "grader_score": clamped_score, 85 "status": result.get("status", "success"), 86 "tests_passed": result.get("tests_passed"), 87 "total_tests": result.get("total_tests") 88 }) 89 except Exception as e: 90 # This will show up in your Hugging Face Space Logs 91 print(f"ERROR IN /grader: {str(e)}") 92 return JSONResponse(status_code=500, content={"error": str(e)}) 93 94 95 @app.get("/baseline") 96 def trigger_baseline(): 97 """ 98 Triggers baseline inference and returns scores. 99 Required by PS: /baseline endpoint. 100 Note: Full inference.py should be run separately for complete results. 101 Returns pre-computed baseline scores. 102 """ 103 baseline_scores = { 104 "EASY": { 105 "avg_score": 0.01, 106 "note": "Run inference.py to generate actual baseline scores" 107 }, 108 "MEDIUM": { 109 "avg_score": 0.01, 110 "note": "Run inference.py to generate actual baseline scores" 111 }, 112 "HARD": { 113 "avg_score": 0.01, 114 "note": "Run inference.py to generate actual baseline scores" 115 } 116 } 117 118 # Try to load pre-computed results if available 119 import json 120 import os 121 # results_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "baseline_results.json") # when app is in root 122 results_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),"baseline_results.json") # when app is in server/ 123 if os.path.exists(results_path): 124 with open(results_path) as f: 125 data = json.load(f) 126 return JSONResponse(content={ 127 "status": "pre_computed", 128 "model": data.get("model"), 129 "overall_score": data.get("overall"), 130 "scores_by_difficulty": data.get("summary"), 131 "results": data.get("results") 132 }) 133 134 return JSONResponse(content={ 135 "status": "not_computed", 136 "message": "Run inference.py first to generate baseline scores", 137 "baseline_scores": baseline_scores 138 }) 139 140 141 @app.get("/health") 142 def health(): 143 return JSONResponse(content={ 144 "status": "ok", 145 "environment": "AE2 - Applied AI Engineering Environment", 146 "version": "1.0.0", 147 "tasks_available": sum(len(TASKS[l]) for l in TASKS), 148 "domains": ["data_eng", "model_ops", "nlp_llm", "deployment", "eval_analysis"] 149 }) 150 151 @app.get("/") 152 def read_root(): 153 """ 154 Landing page for the AE² Environment. 155 Prevents 'Detail Not Found' when visiting the base URL. 156 """ 157 return JSONResponse(content={ 158 "name": "ae2-applied-ai-engineering", 159 "version": "1.0.1", 160 "description": "AE² is a benchmark environment for training and evaluating AI agents on real-world Applied AI Engineering tasks.", 161 "status": "active", 162 "repository": "https://github.com/sudhanshu-ssd/ae2-env" 163 }) 164 165 166 def main(): 167 """Main entry point for the server, required by OpenEnv validator.""" 168 import uvicorn 169 uvicorn.run("server.app:app", host="0.0.0.0", port=7860, reload=False) 170 171 if __name__ == "__main__": 172 main()