|
|
import gradio as gr |
|
|
import pandas as pd |
|
|
from sentence_transformers import SentenceTransformer, util |
|
|
import torch |
|
|
import os |
|
|
import sys |
|
|
import gc |
|
|
import time |
|
|
|
|
|
|
|
|
SYSTEM_TITLE = "花蓮慈濟醫院公文輔助判決系統" |
|
|
FILE_PATH = 'data.csv' |
|
|
INDEX_FILE = 'corpus_embeddings.pt' |
|
|
|
|
|
|
|
|
|
|
|
LOGIN_DATA = ("admin", "htch15583") |
|
|
|
|
|
|
|
|
print("🚀 正在啟動系統...") |
|
|
|
|
|
if not os.path.exists(FILE_PATH): |
|
|
print(f"❌ 錯誤:找不到 {FILE_PATH}") |
|
|
sys.exit(1) |
|
|
|
|
|
try: |
|
|
|
|
|
df = pd.read_csv(FILE_PATH, encoding='cp950') |
|
|
except UnicodeDecodeError: |
|
|
try: |
|
|
df = pd.read_csv(FILE_PATH, encoding='big5') |
|
|
except Exception: |
|
|
df = pd.DataFrame() |
|
|
except Exception: |
|
|
df = pd.DataFrame() |
|
|
|
|
|
|
|
|
if not df.empty: |
|
|
|
|
|
df.columns = [str(c).strip().replace('\ufeff', '') for c in df.columns] |
|
|
|
|
|
|
|
|
for col in df.columns: |
|
|
if '主旨' in col or '內容' in col: df.rename(columns={col: '主旨'}, inplace=True) |
|
|
if '窗口' in col or '單位' in col: df.rename(columns={col: '收文窗口'}, inplace=True) |
|
|
|
|
|
|
|
|
df['主旨'] = df['主旨'].astype(str) |
|
|
df['收文窗口'] = df['收文窗口'].astype(str) |
|
|
df = df.dropna(subset=['主旨', '收文窗口']) |
|
|
|
|
|
corpus = df['主旨'].tolist() |
|
|
total_records = len(corpus) |
|
|
print(f"📊 載入全量資料: {total_records} 筆") |
|
|
else: |
|
|
print("❌ 資料表是空的!") |
|
|
corpus = [] |
|
|
total_records = 0 |
|
|
|
|
|
|
|
|
model = None |
|
|
try: |
|
|
print("🧠 正在載入模型 (BAAI/bge-small-zh-v1.5)...") |
|
|
model = SentenceTransformer('BAAI/bge-small-zh-v1.5') |
|
|
except Exception as e: |
|
|
print(f"❌ 模型載入失敗: {e}") |
|
|
|
|
|
corpus_embeddings = None |
|
|
|
|
|
if total_records > 0 and model is not None: |
|
|
|
|
|
if os.path.exists(INDEX_FILE): |
|
|
print(f"⚡ 偵測到快取檔案,正在秒速載入...") |
|
|
try: |
|
|
corpus_embeddings = torch.load(INDEX_FILE) |
|
|
print("✅ 索引載入完成!") |
|
|
except Exception as e: |
|
|
print(f"❌ 快取檔案損壞,將重新計算。錯誤: {e}") |
|
|
corpus_embeddings = None |
|
|
|
|
|
|
|
|
if corpus_embeddings is None: |
|
|
print(f"🔥 開始計算索引 (需時約 2-4 分鐘,請耐心等候)...") |
|
|
chunk_size = 500 |
|
|
embeddings_chunks = [] |
|
|
|
|
|
try: |
|
|
for i in range(0, total_records, chunk_size): |
|
|
batch = corpus[i : i + chunk_size] |
|
|
batch_emb = model.encode(batch, convert_to_tensor=True, show_progress_bar=False) |
|
|
embeddings_chunks.append(batch_emb) |
|
|
print(f" -> 已處理 {min(i + chunk_size, total_records)} / {total_records} 筆...") |
|
|
gc.collect() |
|
|
|
|
|
corpus_embeddings = torch.cat(embeddings_chunks) |
|
|
|
|
|
torch.save(corpus_embeddings, INDEX_FILE) |
|
|
print("✅ 索引計算並儲存完成!") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"❌ 索引計算失敗: {e}") |
|
|
corpus_embeddings = None |
|
|
|
|
|
|
|
|
def search_department(query): |
|
|
if corpus_embeddings is None: |
|
|
return "⚠️ 系統初始化失敗,請檢查 Logs。" |
|
|
|
|
|
if not query.strip(): |
|
|
return "請輸入公文主旨..." |
|
|
|
|
|
query_embedding = model.encode(query, convert_to_tensor=True) |
|
|
cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0] |
|
|
|
|
|
top_k = min(3, len(corpus)) |
|
|
top_results = torch.topk(cos_scores, k=top_k) |
|
|
|
|
|
output_text = f"🔍 分析結果:\n" + "="*30 + "\n" |
|
|
|
|
|
for score, idx in zip(top_results.values, top_results.indices): |
|
|
idx = idx.item() |
|
|
if idx < len(df): |
|
|
row = df.iloc[idx] |
|
|
score_val = score.item() |
|
|
|
|
|
if score_val > 0.7: confidence = "⭐⭐⭐ 極高" |
|
|
elif score_val > 0.55: confidence = "⭐⭐ 高" |
|
|
else: confidence = "⭐ 參考" |
|
|
|
|
|
output_text += f"【推薦單位】:{row['收文窗口']}\n" |
|
|
output_text += f" - 歷史案例:{row['主旨']}\n" |
|
|
output_text += f" - 相似度:{score_val:.4f} ({confidence})\n" |
|
|
output_text += "-"*20 + "\n" |
|
|
|
|
|
return output_text |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=search_department, |
|
|
inputs=gr.Textbox(lines=3, placeholder="請輸入公文主旨..."), |
|
|
outputs=gr.Textbox(lines=12, label="AI 判決建議"), |
|
|
title=SYSTEM_TITLE, |
|
|
description=f"系統狀態:{'🟢 系統正常' if corpus_embeddings is not None else '🔴 異常'}\n資料庫收錄:{total_records} 筆歷史資料", |
|
|
examples=[["檢送本署彙整人工生殖機構之捐贈生殖細胞使用情形"], ["函轉衛生局關於流感疫苗接種計畫"]] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
iface.launch(auth=LOGIN_DATA) |