Spaces:

AIMaster7
/

Mono

Paused

App Files Files Community

AIMaster7 commited on Jul 4, 2025

Commit

4f72e24

verified ·

1 Parent(s): a302e21

Update main.py

Browse files

Files changed (1) hide show

main.py +40 -28

main.py CHANGED Viewed

@@ -28,7 +28,6 @@ IMAGE_GEN_API_URL = "https://www.chatwithmono.xyz/api/image"
 MODERATION_API_URL = "https://www.chatwithmono.xyz/api/moderation"
 # --- Model Definitions ---
-# Added florence-2-ocr for the new endpoint
 AVAILABLE_MODELS = [
     {"id": "gpt-4-turbo", "object": "model", "created": int(time.time()), "owned_by": "system"},
     {"id": "gpt-4o", "object": "model", "created": int(time.time()), "owned_by": "system"},
@@ -43,10 +42,9 @@ MODEL_ALIASES = {}
 app = FastAPI(
     title="OpenAI Compatible API",
     description="An adapter for various services to be compatible with the OpenAI API specification.",
-    version="1.1.0"
 )
-# Initialize Gradio client for OCR globally to avoid re-initialization on each request
 try:
     ocr_client = Client("multimodalart/Florence-2-l4")
 except Exception as e:
@@ -54,8 +52,7 @@ except Exception as e:
     ocr_client = None
 # --- Pydantic Models ---
-# /v1/chat/completions
 class Message(BaseModel):
     role: str
     content: str
@@ -66,7 +63,6 @@ class ChatRequest(BaseModel):
     stream: Optional[bool] = False
     tools: Optional[Any] = None
-# /v1/images/generations
 class ImageGenerationRequest(BaseModel):
     prompt: str
     aspect_ratio: Optional[str] = "1:1"
@@ -74,12 +70,10 @@ class ImageGenerationRequest(BaseModel):
     user: Optional[str] = None
     model: Optional[str] = "default"
-# /v1/moderations
 class ModerationRequest(BaseModel):
     input: Union[str, List[str]]
     model: Optional[str] = "text-moderation-stable"
-# /v1/ocr
 class OcrRequest(BaseModel):
     image_url: Optional[str] = Field(None, description="URL of the image to process.")
     image_b64: Optional[str] = Field(None, description="Base64 encoded string of the image to process.")
@@ -88,11 +82,9 @@ class OcrRequest(BaseModel):
     @classmethod
     def check_sources(cls, data: Any) -> Any:
         if isinstance(data, dict):
-            url = data.get('image_url')
-            b64 = data.get('image_b64')
-            if not (url or b64):
                 raise ValueError('Either image_url or image_b64 must be provided.')
-            if url and b64:
                 raise ValueError('Provide either image_url or image_b64, not both.')
         return data
@@ -100,10 +92,8 @@ class OcrResponse(BaseModel):
     ocr_text: str
     raw_response: dict
-# --- Helper Function for Random ID Generation ---
 def generate_random_id(prefix: str, length: int = 29) -> str:
-    """Generates a cryptographically secure, random alphanumeric ID."""
     population = string.ascii_letters + string.digits
     random_part = "".join(secrets.choice(population) for _ in range(length))
     return f"{prefix}{random_part}"
@@ -115,6 +105,7 @@ async def list_models():
     """Lists the available models."""
     return {"object": "list", "data": AVAILABLE_MODELS}
 @app.post("/v1/chat/completions", tags=["Chat"])
 async def chat_completion(request: ChatRequest):
     """Handles chat completion requests, supporting streaming and non-streaming."""
@@ -128,7 +119,6 @@ async def chat_completion(request: ChatRequest):
         'user-agent': 'Mozilla/5.0',
     }
-    # Handle tool prompting
     if request.tools:
         tool_prompt = f"""You have access to the following tools. To call a tool, please respond with JSON for a tool call within <tool_call></tool_call> XML tags. Respond in the format {{"name": tool name, "parameters": dictionary of argument name and its value}}. Do not use variables.
 Tools: {";".join(f"<tool>{tool}</tool>" for tool in request.tools)}
@@ -181,7 +171,6 @@ Response Format for tool call:
                                     in_tool_call = False
                                     tool_call_buffer = ""
-                                    # Process text that might come after the tool call in the same chunk
                                     remaining_text = current_buffer.split("</tool_call>", 1)[1]
                                     if remaining_text:
                                         content_piece = remaining_text
@@ -191,16 +180,14 @@ Response Format for tool call:
                                 if "<tool_call>" in content_piece:
                                     in_tool_call = True
                                     tool_call_buffer += content_piece.split("<tool_call>", 1)[1]
-                                    # Process text that came before the tool call
                                     text_before = content_piece.split("<tool_call>", 1)[0]
                                     if text_before:
-                                        # Send the text before the tool call starts
                                         delta = {"content": text_before, "tool_calls": None}
                                         chunk = {"id": chat_id, "object": "chat.completion.chunk", "created": created, "model": model_id,
                                                  "choices": [{"index": 0, "delta": delta, "finish_reason": None}], "usage": None}
                                         yield f"data: {json.dumps(chunk)}\n\n"
                                     if "</tool_call>" not in tool_call_buffer:
-                                        continue # Wait for the closing tag
                                 if not in_tool_call:
                                     delta = {"content": content_piece}
@@ -217,7 +204,6 @@ Response Format for tool call:
                                 except (json.JSONDecodeError, AttributeError): pass
                                 break
-                        # Finalize
                         final_usage = None
                         if usage_info:
                             final_usage = {"prompt_tokens": usage_info.get("promptTokens", 0), "completion_tokens": usage_info.get("completionTokens", 0), "total_tokens": usage_info.get("promptTokens", 0) + usage_info.get("completionTokens", 0)}
@@ -232,7 +218,7 @@ Response Format for tool call:
                 yield "data: [DONE]\n\n"
         return StreamingResponse(event_stream(), media_type="text/event-stream")
-    else: # Non-streaming
         full_response, usage_info = "", {}
         try:
             async with httpx.AsyncClient(timeout=120) as client:
@@ -300,6 +286,8 @@ async def generate_images(request: ImageGenerationRequest):
         return JSONResponse(status_code=500, content={"error": "An internal error occurred.", "details": str(e)})
     return {"created": int(time.time()), "data": results}
 @app.post("/v1/ocr", response_model=OcrResponse, tags=["OCR"])
 async def perform_ocr(request: OcrRequest):
     """
@@ -322,16 +310,40 @@ async def perform_ocr(request: OcrRequest):
         prediction = ocr_client.predict(image=handle_file(image_path), task_prompt="OCR", api_name="/process_image")
-        if not prediction or not isinstance(prediction, tuple):
-             raise HTTPException(status_code=502, detail="Invalid response from OCR service.")
-        raw_result = prediction[0]
-        ocr_text = raw_result.get("OCR", "")
-        return OcrResponse(ocr_text=ocr_text, raw_response=raw_result)
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"An error occurred during OCR processing: {str(e)}")
     finally:
-        if temp_file_path:
             os.unlink(temp_file_path)
 @app.post("/v1/moderations", tags=["Moderation"])

 MODERATION_API_URL = "https://www.chatwithmono.xyz/api/moderation"
 # --- Model Definitions ---
 AVAILABLE_MODELS = [
     {"id": "gpt-4-turbo", "object": "model", "created": int(time.time()), "owned_by": "system"},
     {"id": "gpt-4o", "object": "model", "created": int(time.time()), "owned_by": "system"},
 app = FastAPI(
     title="OpenAI Compatible API",
     description="An adapter for various services to be compatible with the OpenAI API specification.",
+    version="1.1.1" # Incremented version for the fix
 )
 try:
     ocr_client = Client("multimodalart/Florence-2-l4")
 except Exception as e:
     ocr_client = None
 # --- Pydantic Models ---
+# (Pydantic models are unchanged and remain the same as before)
 class Message(BaseModel):
     role: str
     content: str
     stream: Optional[bool] = False
     tools: Optional[Any] = None
 class ImageGenerationRequest(BaseModel):
     prompt: str
     aspect_ratio: Optional[str] = "1:1"
     user: Optional[str] = None
     model: Optional[str] = "default"
 class ModerationRequest(BaseModel):
     input: Union[str, List[str]]
     model: Optional[str] = "text-moderation-stable"
 class OcrRequest(BaseModel):
     image_url: Optional[str] = Field(None, description="URL of the image to process.")
     image_b64: Optional[str] = Field(None, description="Base64 encoded string of the image to process.")
     @classmethod
     def check_sources(cls, data: Any) -> Any:
         if isinstance(data, dict):
+            if not (data.get('image_url') or data.get('image_b64')):
                 raise ValueError('Either image_url or image_b64 must be provided.')
+            if data.get('image_url') and data.get('image_b64'):
                 raise ValueError('Provide either image_url or image_b64, not both.')
         return data
     ocr_text: str
     raw_response: dict
+# --- Helper Function ---
 def generate_random_id(prefix: str, length: int = 29) -> str:
     population = string.ascii_letters + string.digits
     random_part = "".join(secrets.choice(population) for _ in range(length))
     return f"{prefix}{random_part}"
     """Lists the available models."""
     return {"object": "list", "data": AVAILABLE_MODELS}
+# (Chat, Image Generation, and Moderation endpoints are unchanged)
 @app.post("/v1/chat/completions", tags=["Chat"])
 async def chat_completion(request: ChatRequest):
     """Handles chat completion requests, supporting streaming and non-streaming."""
         'user-agent': 'Mozilla/5.0',
     }
     if request.tools:
         tool_prompt = f"""You have access to the following tools. To call a tool, please respond with JSON for a tool call within <tool_call></tool_call> XML tags. Respond in the format {{"name": tool name, "parameters": dictionary of argument name and its value}}. Do not use variables.
 Tools: {";".join(f"<tool>{tool}</tool>" for tool in request.tools)}
                                     in_tool_call = False
                                     tool_call_buffer = ""
                                     remaining_text = current_buffer.split("</tool_call>", 1)[1]
                                     if remaining_text:
                                         content_piece = remaining_text
                                 if "<tool_call>" in content_piece:
                                     in_tool_call = True
                                     tool_call_buffer += content_piece.split("<tool_call>", 1)[1]
                                     text_before = content_piece.split("<tool_call>", 1)[0]
                                     if text_before:
                                         delta = {"content": text_before, "tool_calls": None}
                                         chunk = {"id": chat_id, "object": "chat.completion.chunk", "created": created, "model": model_id,
                                                  "choices": [{"index": 0, "delta": delta, "finish_reason": None}], "usage": None}
                                         yield f"data: {json.dumps(chunk)}\n\n"
                                     if "</tool_call>" not in tool_call_buffer:
+                                        continue
                                 if not in_tool_call:
                                     delta = {"content": content_piece}
                                 except (json.JSONDecodeError, AttributeError): pass
                                 break
                         final_usage = None
                         if usage_info:
                             final_usage = {"prompt_tokens": usage_info.get("promptTokens", 0), "completion_tokens": usage_info.get("completionTokens", 0), "total_tokens": usage_info.get("promptTokens", 0) + usage_info.get("completionTokens", 0)}
                 yield "data: [DONE]\n\n"
         return StreamingResponse(event_stream(), media_type="text/event-stream")
+    else:
         full_response, usage_info = "", {}
         try:
             async with httpx.AsyncClient(timeout=120) as client:
         return JSONResponse(status_code=500, content={"error": "An internal error occurred.", "details": str(e)})
     return {"created": int(time.time()), "data": results}
+# === FIXED OCR Endpoint ===
 @app.post("/v1/ocr", response_model=OcrResponse, tags=["OCR"])
 async def perform_ocr(request: OcrRequest):
     """
         prediction = ocr_client.predict(image=handle_file(image_path), task_prompt="OCR", api_name="/process_image")
+        if not prediction or not isinstance(prediction, tuple) or len(prediction) == 0:
+             raise HTTPException(status_code=502, detail="Invalid or empty response from OCR service.")
+        raw_output = prediction[0]
+        raw_result_dict = {}
+        # --- START: FIX ---
+        # The Gradio client returns a JSON string, not a dict. We must parse it.
+        if isinstance(raw_output, str):
+            try:
+                raw_result_dict = json.loads(raw_output)
+            except json.JSONDecodeError:
+                raise HTTPException(status_code=502, detail="Failed to parse JSON response from OCR service.")
+        elif isinstance(raw_output, dict):
+            # If it's already a dict, use it directly
+            raw_result_dict = raw_output
+        else:
+            raise HTTPException(status_code=502, detail=f"Unexpected data type from OCR service: {type(raw_output)}")
+        # --- END: FIX ---
+        ocr_text = raw_result_dict.get("OCR", "")
+        # Fallback in case the OCR key is missing but there's other data
+        if not ocr_text:
+            ocr_text = str(raw_result_dict)
+        return OcrResponse(ocr_text=ocr_text, raw_response=raw_result_dict)
     except Exception as e:
+        # Catch the specific HTTPException and re-raise it, otherwise wrap other exceptions
+        if isinstance(e, HTTPException):
+            raise e
         raise HTTPException(status_code=500, detail=f"An error occurred during OCR processing: {str(e)}")
     finally:
+        if temp_file_path and os.path.exists(temp_file_path):
             os.unlink(temp_file_path)
 @app.post("/v1/moderations", tags=["Moderation"])