/ server / app.py
app.py
  1  import sys
  2  import os
  3  
  4  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # when app is in server/
  5  
  6  
  7  from openenv.core.env_server import create_fastapi_app
  8  from fastapi import FastAPI
  9  from fastapi.responses import JSONResponse
 10  
 11  from models import EngAction, EngObservation
 12  
 13  from environment import EngEnv
 14  from task_loader import TASKS, TESTS
 15  from grader import grader
 16  from pydantic import BaseModel
 17  
 18  # Base app from OpenEnv — gives /ws /reset /step /state /health /web /docs
 19  app = create_fastapi_app(EngEnv,EngAction,EngObservation)# ── Additional required endpoints ────────────────────────────────────────────
 20  
 21  @app.get("/tasks")
 22  def list_tasks():
 23      """
 24      Returns all available tasks and their action schema.
 25      Required by PS: /tasks endpoint.
 26      """
 27      all_tasks = []
 28      for level in ['EASY', 'MEDIUM', 'HARD']:
 29          for task in TASKS[level]:
 30              all_tasks.append({
 31                  "id": task["id"],
 32                  "domain": task["domain"],
 33                  "difficulty": level,
 34                  "name": task["name"],
 35                  "description": task["description"],
 36                  "action_schema": {
 37                      "sol": "str  # Your complete Python solution code"
 38                  }
 39              })
 40      return JSONResponse(content={"tasks": all_tasks, "total": len(all_tasks)})
 41  
 42  
 43      
 44  # @app.get("/grader")
 45  # def get_grader_score(task_id: str, code: str):
 46  #     try:
 47  #         result = grader(code, task_id)
 48  #         return JSONResponse(content={
 49  #             "task_id": task_id,
 50  #             "grader_score": result["grader_score"],
 51  #             "status": result["status"],
 52  #             "tests_passed": result["tests_passed"],
 53  #             "total_tests": result["total_tests"],
 54  #             "efficiency": result["efficiency"],
 55  #         })
 56  #     except KeyError:
 57  #         return JSONResponse(status_code=404, content={"error": f"Task '{task_id}' not found"})
 58  #     except Exception as e:
 59  #         return JSONResponse(status_code=500, content={"error": str(e)})
 60      
 61  # from pydantic import BaseModel
 62  # from fastapi.responses import JSONResponse
 63  
 64  # 1. Ensure the model is defined
 65  class GraderRequest(BaseModel):
 66      task_id: str
 67      code: str
 68  
 69  @app.post("/grader")
 70  def get_grader_score(request: GraderRequest):
 71      try:
 72          # 2. CRITICAL: You must use request.code and request.task_id
 73          # If you used 'code' or 'task_id' directly, it caused the 500 error.
 74          result = grader(request.code, request.task_id)
 75          
 76          # 3. Apply the mandatory Phase 2 clamp (strictly between 0 and 1)
 77          raw_score = result.get("grader_score", 0.01)
 78          clamped_score = max(0.01, min(raw_score, 0.99))
 79          clamped_score = float(clamped_score)
 80          # res["grader_score"] = max(0.05, min(float(raw_score), 0.95))
 81          # return JSONResponse(content=res)
 82          
 83          return JSONResponse(content={
 84              "grader_score": clamped_score,
 85              "status": result.get("status", "success"),
 86              "tests_passed": result.get("tests_passed"),
 87              "total_tests": result.get("total_tests")
 88          })
 89      except Exception as e:
 90          # This will show up in your Hugging Face Space Logs
 91          print(f"ERROR IN /grader: {str(e)}")
 92          return JSONResponse(status_code=500, content={"error": str(e)})
 93  
 94  
 95  @app.get("/baseline")
 96  def trigger_baseline():
 97      """
 98      Triggers baseline inference and returns scores.
 99      Required by PS: /baseline endpoint.
100      Note: Full inference.py should be run separately for complete results.
101      Returns pre-computed baseline scores.
102      """
103      baseline_scores = {
104          "EASY": {
105              "avg_score": 0.01,
106              "note": "Run inference.py to generate actual baseline scores"
107          },
108          "MEDIUM": {
109              "avg_score": 0.01,
110              "note": "Run inference.py to generate actual baseline scores"
111          },
112          "HARD": {
113              "avg_score": 0.01,
114              "note": "Run inference.py to generate actual baseline scores"
115          }
116      }
117      
118      # Try to load pre-computed results if available
119      import json
120      import os
121      # results_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "baseline_results.json") # when app is in root 
122      results_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),"baseline_results.json") # when app is in server/
123      if os.path.exists(results_path):
124          with open(results_path) as f:
125              data = json.load(f)
126          return JSONResponse(content={
127              "status": "pre_computed",
128              "model": data.get("model"),
129              "overall_score": data.get("overall"),
130              "scores_by_difficulty": data.get("summary"),
131              "results": data.get("results")
132          })
133      
134      return JSONResponse(content={
135          "status": "not_computed",
136          "message": "Run inference.py first to generate baseline scores",
137          "baseline_scores": baseline_scores
138      })
139  
140  
141  @app.get("/health")
142  def health():
143      return JSONResponse(content={
144          "status": "ok",
145          "environment": "AE2 - Applied AI Engineering Environment",
146          "version": "1.0.0",
147          "tasks_available": sum(len(TASKS[l]) for l in TASKS),
148          "domains": ["data_eng", "model_ops", "nlp_llm", "deployment", "eval_analysis"]
149      })
150  
151  @app.get("/")
152  def read_root():
153      """
154      Landing page for the AE² Environment.
155      Prevents 'Detail Not Found' when visiting the base URL.
156      """
157      return JSONResponse(content={
158          "name": "ae2-applied-ai-engineering",
159          "version": "1.0.1",
160          "description": "AE² is a benchmark environment for training and evaluating AI agents on real-world Applied AI Engineering tasks.",
161          "status": "active",
162          "repository": "https://github.com/sudhanshu-ssd/ae2-env"
163      })
164  
165  
166  def main():
167      """Main entry point for the server, required by OpenEnv validator."""
168      import uvicorn
169      uvicorn.run("server.app:app", host="0.0.0.0", port=7860, reload=False)
170  
171  if __name__ == "__main__":
172      main()