Spaces:
Running
Running
| import os | |
| import json | |
| import urllib.parse | |
| import requests | |
| import gradio as gr | |
| from typing import Literal, List, Dict | |
| JINA_API_BASE = "https://s.jina.ai" # Search endpoint | |
| # For reading page content, you can also use r.jina.ai like: https://r.jina.ai/https://example.com | |
| def _headers(): | |
| api_key = os.getenv("JINA_API_KEY") | |
| headers = { | |
| "Accept": "application/json", | |
| "X-Engine": "direct", | |
| } | |
| if api_key: | |
| headers["Authorization"] = f"Bearer {api_key}" | |
| return headers | |
| def search(input_query: str, max_results: int = 5) -> List[Dict[Literal["snippet", "title", "link"], str]]: | |
| """ | |
| Perform a web search using Jina AI (s.jina.ai). | |
| Args: | |
| input_query: The query to search for. | |
| max_results: The maximum number of results to return. Defaults to 5. | |
| Returns: | |
| A list of dictionaries with "snippet", "title", and "link". | |
| """ | |
| # Jina Search accepts the query as part of the path; use JSON response for structured data | |
| # URL-encode the query to be safe in the path segment | |
| encoded = urllib.parse.quote(input_query, safe="") | |
| url = f"{JINA_API_BASE}/{encoded}" | |
| # Request JSON output; Jina returns the top results with metadata when Accept: application/json is set | |
| resp = requests.get(url, headers=_headers(), timeout=30) | |
| resp.raise_for_status() | |
| data = resp.json() | |
| # The JSON shape from s.jina.ai includes results with title, url, and snippet-like content | |
| # We map to a compact schema: title, link, snippet | |
| items = [] | |
| # Common keys seen include "results" or "data" depending on mode; prefer "results" if present | |
| results = [] | |
| if isinstance(data, dict): | |
| if "results" in data and isinstance(data["results"], list): | |
| results = data["results"] | |
| elif "data" in data and isinstance(data["data"], list): | |
| results = data["data"] | |
| else: | |
| # Fallback: if it's already a list | |
| results = data if isinstance(data, list) else [] | |
| elif isinstance(data, list): | |
| results = data | |
| for r in results[:max_results]: | |
| title = r.get("title") or r.get("headline") or "" | |
| link = r.get("url") or r.get("link") or "" | |
| # Prefer a concise snippet if present; otherwise fallback to any text/description | |
| snippet = r.get("snippet") or r.get("description") or r.get("content") or "" | |
| # Ensure strings | |
| title = str(title) if title is not None else "" | |
| link = str(link) if link is not None else "" | |
| snippet = str(snippet) if snippet is not None else "" | |
| if link or title or snippet: | |
| items.append({"title": title, "link": link, "snippet": snippet}) | |
| return items | |
| demo = gr.Interface( | |
| fn=search, | |
| inputs=[ | |
| gr.Textbox(value="Ahly SC of Egypt matches.", label="Search query"), | |
| gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"), | |
| ], | |
| outputs=gr.JSON(label="Search results"), | |
| title="Web Searcher using Jina AI", | |
| description="Search the web using Jina AI Search Foundation API (s.jina.ai).", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(mcp_server=True) |