Migaku
commited on
Commit
·
d161ef2
1
Parent(s):
230df25
initial model card & files
Browse files- .ipynb_checkpoints/README-checkpoint.md +73 -0
- .ipynb_checkpoints/config-checkpoint.json +8 -0
- .ipynb_checkpoints/predict_ret_next-checkpoint.py +51 -0
- .ipynb_checkpoints/requirements-checkpoint.txt +6 -0
- README.md +70 -0
- config.json +8 -0
- model.joblib +3 -0
- predict_ret_next.py +51 -0
- requirements.txt +6 -0
.ipynb_checkpoints/README-checkpoint.md
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
tags:
|
| 4 |
+
- finance
|
| 5 |
+
- japanese
|
| 6 |
+
- stock-prediction
|
| 7 |
+
- sentence-transformers
|
| 8 |
+
- gradient-boosting
|
| 9 |
+
datasets:
|
| 10 |
+
- kabu-disclosures-v1
|
| 11 |
+
metrics:
|
| 12 |
+
- mean_absolute_error
|
| 13 |
+
- r2
|
| 14 |
+
model-index:
|
| 15 |
+
- name: jfinance-title2return-v1
|
| 16 |
+
results:
|
| 17 |
+
- task:
|
| 18 |
+
type: regression
|
| 19 |
+
name: Stock Return Regression
|
| 20 |
+
dataset:
|
| 21 |
+
name: JP_Disclosure_Titles_2024Q3-2025Q2
|
| 22 |
+
type: custom
|
| 23 |
+
metrics:
|
| 24 |
+
- type: mean_absolute_error
|
| 25 |
+
value: 2.94
|
| 26 |
+
- type: r2
|
| 27 |
+
value: -0.064
|
| 28 |
+
---
|
| 29 |
+
|
| 30 |
+
# jfinance-title2return-v1
|
| 31 |
+
|
| 32 |
+
日本株 **TDnet/EDINET 開示タイトル** から
|
| 33 |
+
**翌営業日リターン (`ret_next`, %)** を推定する Gradient Boosting Regressor モデルです。
|
| 34 |
+
タイトル文は [`sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2`](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2)
|
| 35 |
+
(384 次元)にエンコードし、±25 % Winsorize 済みリターンを回帰ターゲットに学習しました。
|
| 36 |
+
|
| 37 |
+
---
|
| 38 |
+
|
| 39 |
+
## Intended Uses & Limitations
|
| 40 |
+
|
| 41 |
+
| ✔ 推奨用途 | ✘ 非推奨用途 |
|
| 42 |
+
|-----------|-------------|
|
| 43 |
+
| IR イベントドリブンの速報スクリーニング | 裁量なしの完全自動売買 |
|
| 44 |
+
| 金融 NLP 研究・ベンチマーク | ETF/REIT での厳密な値動き予測 |
|
| 45 |
+
| 個人投資家の材料整理 | 終値 < 1 円の超低位株 |
|
| 46 |
+
|
| 47 |
+
> **注意**: 実際の投資判断に用いる際は必ず追加検証を行ってください。
|
| 48 |
+
|
| 49 |
+
---
|
| 50 |
+
|
| 51 |
+
## Quick inference (one-liner)
|
| 52 |
+
|
| 53 |
+
```python
|
| 54 |
+
from joblib import load
|
| 55 |
+
from sentence_transformers import SentenceTransformer
|
| 56 |
+
from huggingface_hub import hf_hub_download
|
| 57 |
+
|
| 58 |
+
# download & load model
|
| 59 |
+
reg = load(hf_hub_download("c299m/jfinance-title2return-v1", "model.joblib"))
|
| 60 |
+
embed = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
| 61 |
+
|
| 62 |
+
title = "AIマッチングサービスβ版リリースのお知らせ"
|
| 63 |
+
pred = reg.predict(embed.encode([title]))[0]
|
| 64 |
+
print(f"Predicted next-day return: {pred:.2f} %")
|
| 65 |
+
|
| 66 |
+
### Run with the helper script
|
| 67 |
+
|
| 68 |
+
```bashcd
|
| 69 |
+
python predict_ret_next.py "NVIDIAと提携"
|
| 70 |
+
|
| 71 |
+
#▶ loading models …
|
| 72 |
+
#予測翌営業日リターン: 15.04 %
|
| 73 |
+
|
.ipynb_checkpoints/config-checkpoint.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"embed_model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
| 3 |
+
"feature_dim": 384,
|
| 4 |
+
"ret_clip_percent": 25,
|
| 5 |
+
"winsorize": true,
|
| 6 |
+
"model_type": "sklearn_gradient_boosting_regressor",
|
| 7 |
+
"created_at": "2025-07-08T00:00:00+09:00"
|
| 8 |
+
}
|
.ipynb_checkpoints/predict_ret_next-checkpoint.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
"""
|
| 4 |
+
predict_ret_next.py
|
| 5 |
+
-------------------
|
| 6 |
+
使い方:
|
| 7 |
+
python predict_ret_next.py "業績予想の上方修正に関するお知らせ"
|
| 8 |
+
|
| 9 |
+
オプション:
|
| 10 |
+
--model パス (default: model.joblib と同じフォルダ)
|
| 11 |
+
--embed Sentence-Transformers 名 (default: paraphrase-multilingual-MiniLM-L12-v2)
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import argparse, joblib, os
|
| 15 |
+
from sentence_transformers import SentenceTransformer
|
| 16 |
+
|
| 17 |
+
def load_model(model_path):
|
| 18 |
+
if not os.path.exists(model_path):
|
| 19 |
+
raise FileNotFoundError(f"model not found: {model_path}")
|
| 20 |
+
return joblib.load(model_path)
|
| 21 |
+
|
| 22 |
+
def main():
|
| 23 |
+
ap = argparse.ArgumentParser()
|
| 24 |
+
ap.add_argument("title", help="開示タイトル(日本語 or 英語)")
|
| 25 |
+
ap.add_argument("--model", default="model.joblib",
|
| 26 |
+
help="joblib file path (default: ./model.joblib)")
|
| 27 |
+
ap.add_argument("--embed", default="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
| 28 |
+
help="embedding model name or path")
|
| 29 |
+
args = ap.parse_args()
|
| 30 |
+
|
| 31 |
+
print("▶ loading models …")
|
| 32 |
+
reg = load_model(args.model)
|
| 33 |
+
embedder = SentenceTransformer(args.embed,
|
| 34 |
+
device="cuda" if embedder_gpu() else "cpu")
|
| 35 |
+
|
| 36 |
+
vec = embedder.encode([args.title])
|
| 37 |
+
pred = reg.predict(vec)[0]
|
| 38 |
+
print(f"\n予測翌営業日リターン: {pred:.2f} %")
|
| 39 |
+
|
| 40 |
+
def embedder_gpu():
|
| 41 |
+
try:
|
| 42 |
+
import torch
|
| 43 |
+
if torch.cuda.is_available():
|
| 44 |
+
maj, min = torch.cuda.get_device_capability()
|
| 45 |
+
return (maj * 10 + min) <= 90 # sm_120 以上は未対応 ⇒ CPU
|
| 46 |
+
except ImportError:
|
| 47 |
+
pass
|
| 48 |
+
return False
|
| 49 |
+
|
| 50 |
+
if __name__ == "__main__":
|
| 51 |
+
main()
|
.ipynb_checkpoints/requirements-checkpoint.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
sentence-transformers>=2.7.0
|
| 2 |
+
scikit-learn>=1.5
|
| 3 |
+
joblib>=1.4
|
| 4 |
+
numpy>=1.26,<2.0
|
| 5 |
+
torch>=2.2
|
| 6 |
+
tqdm
|
README.md
CHANGED
|
@@ -1,3 +1,73 @@
|
|
| 1 |
---
|
| 2 |
license: apache-2.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
license: apache-2.0
|
| 3 |
+
tags:
|
| 4 |
+
- finance
|
| 5 |
+
- japanese
|
| 6 |
+
- stock-prediction
|
| 7 |
+
- sentence-transformers
|
| 8 |
+
- gradient-boosting
|
| 9 |
+
datasets:
|
| 10 |
+
- kabu-disclosures-v1
|
| 11 |
+
metrics:
|
| 12 |
+
- mean_absolute_error
|
| 13 |
+
- r2
|
| 14 |
+
model-index:
|
| 15 |
+
- name: jfinance-title2return-v1
|
| 16 |
+
results:
|
| 17 |
+
- task:
|
| 18 |
+
type: regression
|
| 19 |
+
name: Stock Return Regression
|
| 20 |
+
dataset:
|
| 21 |
+
name: JP_Disclosure_Titles_2024Q3-2025Q2
|
| 22 |
+
type: custom
|
| 23 |
+
metrics:
|
| 24 |
+
- type: mean_absolute_error
|
| 25 |
+
value: 2.94
|
| 26 |
+
- type: r2
|
| 27 |
+
value: -0.064
|
| 28 |
---
|
| 29 |
+
|
| 30 |
+
# jfinance-title2return-v1
|
| 31 |
+
|
| 32 |
+
日本株 **TDnet/EDINET 開示タイトル** から
|
| 33 |
+
**翌営業日リターン (`ret_next`, %)** を推定する Gradient Boosting Regressor モデルです。
|
| 34 |
+
タイトル文は [`sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2`](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2)
|
| 35 |
+
(384 次元)にエンコードし、±25 % Winsorize 済みリターンを回帰ターゲットに学習しました。
|
| 36 |
+
|
| 37 |
+
---
|
| 38 |
+
|
| 39 |
+
## Intended Uses & Limitations
|
| 40 |
+
|
| 41 |
+
| ✔ 推奨用途 | ✘ 非推奨用途 |
|
| 42 |
+
|-----------|-------------|
|
| 43 |
+
| IR イベントドリブンの速報スクリーニング | 裁量なしの完全自動売買 |
|
| 44 |
+
| 金融 NLP 研究・ベンチマーク | ETF/REIT での厳密な値動き予測 |
|
| 45 |
+
| 個人投資家の材料整理 | 終値 < 1 円の超低位株 |
|
| 46 |
+
|
| 47 |
+
> **注意**: 実際の投資判断に用いる際は必ず追加検証を行ってください。
|
| 48 |
+
|
| 49 |
+
---
|
| 50 |
+
|
| 51 |
+
## Quick inference (one-liner)
|
| 52 |
+
|
| 53 |
+
```python
|
| 54 |
+
from joblib import load
|
| 55 |
+
from sentence_transformers import SentenceTransformer
|
| 56 |
+
from huggingface_hub import hf_hub_download
|
| 57 |
+
|
| 58 |
+
# download & load model
|
| 59 |
+
reg = load(hf_hub_download("c299m/jfinance-title2return-v1", "model.joblib"))
|
| 60 |
+
embed = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
| 61 |
+
|
| 62 |
+
title = "AIマッチングサービスβ版リリースのお知らせ"
|
| 63 |
+
pred = reg.predict(embed.encode([title]))[0]
|
| 64 |
+
print(f"Predicted next-day return: {pred:.2f} %")
|
| 65 |
+
|
| 66 |
+
### Run with the helper script
|
| 67 |
+
|
| 68 |
+
```bashcd
|
| 69 |
+
python predict_ret_next.py "NVIDIAと提携"
|
| 70 |
+
|
| 71 |
+
#▶ loading models …
|
| 72 |
+
#予測翌営業日リターン: 15.04 %
|
| 73 |
+
|
config.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"embed_model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
| 3 |
+
"feature_dim": 384,
|
| 4 |
+
"ret_clip_percent": 25,
|
| 5 |
+
"winsorize": true,
|
| 6 |
+
"model_type": "sklearn_gradient_boosting_regressor",
|
| 7 |
+
"created_at": "2025-07-08T00:00:00+09:00"
|
| 8 |
+
}
|
model.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6435d4b6e62f63db03daa8b55e43e8493cf7df2f0ad60542dc049594373d8ab
|
| 3 |
+
size 472851
|
predict_ret_next.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
"""
|
| 4 |
+
predict_ret_next.py
|
| 5 |
+
-------------------
|
| 6 |
+
使い方:
|
| 7 |
+
python predict_ret_next.py "業績予想の上方修正に関するお知らせ"
|
| 8 |
+
|
| 9 |
+
オプション:
|
| 10 |
+
--model パス (default: model.joblib と同じフォルダ)
|
| 11 |
+
--embed Sentence-Transformers 名 (default: paraphrase-multilingual-MiniLM-L12-v2)
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import argparse, joblib, os
|
| 15 |
+
from sentence_transformers import SentenceTransformer
|
| 16 |
+
|
| 17 |
+
def load_model(model_path):
|
| 18 |
+
if not os.path.exists(model_path):
|
| 19 |
+
raise FileNotFoundError(f"model not found: {model_path}")
|
| 20 |
+
return joblib.load(model_path)
|
| 21 |
+
|
| 22 |
+
def main():
|
| 23 |
+
ap = argparse.ArgumentParser()
|
| 24 |
+
ap.add_argument("title", help="開示タイトル(日本語 or 英語)")
|
| 25 |
+
ap.add_argument("--model", default="model.joblib",
|
| 26 |
+
help="joblib file path (default: ./model.joblib)")
|
| 27 |
+
ap.add_argument("--embed", default="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
| 28 |
+
help="embedding model name or path")
|
| 29 |
+
args = ap.parse_args()
|
| 30 |
+
|
| 31 |
+
print("▶ loading models …")
|
| 32 |
+
reg = load_model(args.model)
|
| 33 |
+
embedder = SentenceTransformer(args.embed,
|
| 34 |
+
device="cuda" if embedder_gpu() else "cpu")
|
| 35 |
+
|
| 36 |
+
vec = embedder.encode([args.title])
|
| 37 |
+
pred = reg.predict(vec)[0]
|
| 38 |
+
print(f"\n予測翌営業日リターン: {pred:.2f} %")
|
| 39 |
+
|
| 40 |
+
def embedder_gpu():
|
| 41 |
+
try:
|
| 42 |
+
import torch
|
| 43 |
+
if torch.cuda.is_available():
|
| 44 |
+
maj, min = torch.cuda.get_device_capability()
|
| 45 |
+
return (maj * 10 + min) <= 90 # sm_120 以上は未対応 ⇒ CPU
|
| 46 |
+
except ImportError:
|
| 47 |
+
pass
|
| 48 |
+
return False
|
| 49 |
+
|
| 50 |
+
if __name__ == "__main__":
|
| 51 |
+
main()
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
sentence-transformers>=2.7.0
|
| 2 |
+
scikit-learn>=1.5
|
| 3 |
+
joblib>=1.4
|
| 4 |
+
numpy>=1.26,<2.0
|
| 5 |
+
torch>=2.2
|
| 6 |
+
tqdm
|