Spaces:

kasimali
/

finalxls-r-mms

Running

App Files Files Community

kasimali commited on Oct 8

Commit

50ce094

verified ·

1 Parent(s): b807c61

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

README.md +3 -6
app.py +211 -0
requirements.txt +4 -0

README.md CHANGED Viewed

@@ -1,10 +1,7 @@
 ---
-title: Finalxls R Mms
-emoji: ⚡
-colorFrom: green
-colorTo: indigo
 sdk: static
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: FINALXLS-R-MMS
+emoji: 🚀
 sdk: static
 ---
+# FINALXLS-R-MMS

app.py ADDED Viewed

	@@ -0,0 +1,211 @@

+# FINALXLS-R-MMS
+# ============================================================================
+# CELL 1: SETUP AND INSTALLATION
+# ============================================================================
+import os
+import warnings
+warnings.filterwarnings('ignore')
+print("🚀 MMS Language Identification Test (Final Corrected Version)")
+print("=" * 60)
+# Mount Google Drive
+from google.colab import drive
+# Install and update necessary packages
+print("📦 Installing and updating packages...")
+print("✅ Setup complete! Please restart the runtime now to apply updates.")
+# ============================================================================
+# CELL 2: MODEL LOADING AND MAPPINGS (CORRECTED)
+# ============================================================================
+import torch
+import librosa
+import pandas as pd
+import numpy as np
+from datetime import datetime
+from transformers import Wav2Vec2FeatureExtractor, AutoModelForAudioClassification
+from sklearn.metrics import accuracy_score, classification_report
+# --- CORRECTED: Ground truth mapping from your 2-letter folder names ---
+# This remains the same as your code.
+CUSTOM_FOLDER_MAPPING = {
+    'as': 'asm', 'bn': 'ben', 'br': 'brx', 'doi': 'dgo', 'en': 'eng',
+    'gu': 'guj', 'hi': 'hin', 'kn': 'kan', 'kok': 'kok', 'ks': 'kas',
+    'mai': 'mai', 'ml': 'mal', 'mni': 'mni', 'mr': 'mar', 'ne': 'nep',
+    'or': 'ory', 'pa': 'pa', 'sa': 'san', 'sat': 'sat', 'sd': 'snd',
+    'ta': 'tam', 'te': 'tel', 'ur': 'urd'
+}
+# --- NEW: Comprehensive Normalization Mapping ---
+# This map standardizes the model's predictions to match YOUR ground truth format.
+NORMALIZATION_MAP = {
+    'asm': 'asm', 'ben': 'ben', 'brx': 'brx', 'dgo': 'dgo', 'eng': 'eng',
+    'guj': 'guj', 'hin': 'hin', 'kan': 'kan', 'kok': 'kok', 'kas': 'kas',
+    'mai': 'mai', 'mal': 'mal', 'mni': 'mni', 'mar': 'mar', 'ory': 'ory',
+    'pan': 'pa',  # Corrects 'pan' to 'pa'
+    'san': 'san', 'sat': 'sat', 'snd': 'snd', 'tam': 'tam', 'tel': 'tel', 'urd': 'urd',
+    'npi': 'nep'  # CRUCIAL: Fixes the Nepali mismatch
+}
+# For generating readable reports
+ISO_TO_FULL_NAME = {
+    'asm': 'Assamese', 'ben': 'Bengali', 'brx': 'Bodo', 'dgo': 'Dogri', 'eng': 'English',
+    'guj': 'Gujarati', 'hin': 'Hindi', 'kan': 'Kannada', 'kok': 'Konkani', 'kas': 'Kashmiri',
+    'mai': 'Maithili', 'mal': 'Malayalam', 'mni': 'Manipuri', 'mar': 'Marathi', 'nep': 'Nepali',
+    'ory': 'Odia', 'pa': 'Punjabi', 'san': 'Sanskrit', 'sat': 'Santali', 'snd': 'Sindhi',
+    'tam': 'Tamil', 'tel': 'Telugu', 'urd': 'Urdu'
+}
+# --- Paths and Model Loading (No Changes) ---
+AUDIO_FOLDER = "/content/drive/MyDrive/Audio_files"
+RESULTS_FOLDER = "/content/drive/MyDrive/mms_lid_results"
+os.makedirs(RESULTS_FOLDER, exist_ok=True)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"🔧 Device: {device}")
+MODEL_NAME = "facebook/mms-lid-256"
+feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_NAME)
+model = AutoModelForAudioClassification.from_pretrained(MODEL_NAME).to(device)
+model.eval()
+print(f"✅ MMS LID model and feature extractor loaded successfully: {MODEL_NAME}")
+# ============================================================================
+# CELL 3: AUDIO PROCESSING AND PREDICTION (CORRECTED)
+# ============================================================================
+def load_audio_raw(file_path):
+    try:
+        audio, sr = librosa.load(file_path, sr=16000, mono=True)
+        duration = len(audio) / 16000
+        return audio, duration
+    except Exception as e:
+        print(f"Error loading {file_path}: {e}")
+        return None, 0
+def predict_language_mms_top5(audio_array):
+    """
+    Predicts the top 5 languages, but only from the list of target Indian languages.
+    """
+    try:
+        inputs = feature_extractor(audio_array, sampling_rate=16000, return_tensors="pt")
+        inputs = {k: v.to(device) for k, v in inputs.items()}
+        with torch.no_grad():
+            outputs = model(**inputs)
+        logits = outputs.logits
+        probabilities = torch.softmax(logits, dim=-1)[0]
+        # --- Whitelist Logic ---
+        target_lang_codes = list(CUSTOM_FOLDER_MAPPING.values())
+        target_indices = [model.config.label2id[lang] for lang in target_lang_codes if lang in model.config.label2id]
+        # Create a mask to only consider target languages
+        mask = torch.zeros_like(probabilities)
+        mask[target_indices] = 1
+        # Apply mask and re-normalize probabilities
+        masked_probs = probabilities * mask
+        if masked_probs.sum() > 0:
+            renormalized_probs = masked_probs / masked_probs.sum()
+        else:
+            renormalized_probs = masked_probs # Avoid division by zero
+        # Get Top-5 predictions from the whitelisted languages
+        top5_probs, top5_indices = torch.topk(renormalized_probs, 5)
+        top5_lang_codes = [model.config.id2label[i.item()] for i in top5_indices]
+        return top5_lang_codes, top5_probs.cpu().numpy()
+    except Exception as e:
+        return ["error"], [0.0]
+def find_audio_files(base_path):
+    audio_files = []
+    for root, _, files in os.walk(base_path):
+        folder_code = os.path.basename(root).lower()
+        if folder_code in CUSTOM_FOLDER_MAPPING:
+            ground_truth_iso = CUSTOM_FOLDER_MAPPING[folder_code]
+            for file in files:
+                if file.lower().endswith(('.wav', '.mp3', '.m4a', '.flac', '.ogg')):
+                    audio_files.append({
+                        "file_path": os.path.join(root, file),
+                        "filename": file,
+                        "ground_truth": ground_truth_iso
+                    })
+    return audio_files
+print("✅ Corrected prediction functions are ready!")
+# ============================================================================
+# CELL 4: PROCESS ALL FILES AND GENERATE REPORT (CORRECTED)
+# ============================================================================
+def run_full_analysis_corrected():
+    print("🚀 Processing FULL dataset with Corrected Top-5 Logic...")
+    audio_files = find_audio_files(AUDIO_FOLDER)
+    if not audio_files:
+        print("❌ No audio files found.")
+        return
+    results = []
+    print(f"🔄 Processing {len(audio_files)} files...")
+    for i, file_info in enumerate(audio_files):
+        if (i + 1) % 100 == 0:
+            print(f"Progress: {i+1}/{len(audio_files)}")
+        audio, duration = load_audio_raw(str(file_info['file_path']))
+        if audio is None:
+            results.append({**file_info, 'predicted_language': 'load_error', 'top5_predictions': [], 'confidence': 0.0, 'duration': 0.0})
+        else:
+            top5_langs, top5_probs = predict_language_mms_top5(audio)
+            # Apply normalization to all predictions
+            normalized_top5 = [NORMALIZATION_MAP.get(lang, 'unknown') for lang in top5_langs]
+            results.append({
+                **file_info,
+                'predicted_language': normalized_top5[0], # Top-1 prediction
+                'confidence': top5_probs[0],
+                'duration': duration,
+                'is_short_file': duration < 3.0,
+                'top5_predictions': normalized_top5
+            })
+    results_df = pd.DataFrame(results)
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    csv_path = f"{RESULTS_FOLDER}/mms_corrected_top5_results_{timestamp}.csv"
+    results_df.to_csv(csv_path, index=False)
+    print(f"\n✅ Processing complete! Results saved to: {csv_path}")
+    # --- Final Detailed Analysis ---
+    print("\n" + "=" * 60)
+    print("📊 MMS LID MODEL - FINAL CORRECTED ANALYSIS")
+    print("=" * 60)
+    valid_df = results_df[results_df['predicted_language'] != 'load_error'].copy()
+    # Calculate Top-1 Accuracy
+    top1_accuracy = accuracy_score(valid_df['ground_truth'], valid_df['predicted_language'])
+    # Calculate Top-5 Accuracy
+    valid_df['is_top5_correct'] = valid_df.apply(lambda row: row['ground_truth'] in row['top5_predictions'], axis=1)
+    top5_accuracy = valid_df['is_top5_correct'].mean()
+    print(f"\n🎯 OVERALL TOP-1 ACCURACY: {top1_accuracy:.2%}")
+    print(f"🎯 OVERALL TOP-5 ACCURACY: {top5_accuracy:.2%}")
+    print(f"\n📋 LANGUAGE-WISE ACCURACY:")
+    report_df = pd.DataFrame(classification_report(valid_df['ground_truth'], valid_df['predicted_language'], output_dict=True, zero_division=0)).transpose()
+    report_df['Language'] = report_df.index.map(ISO_TO_FULL_NAME)
+    print(report_df[['Language', 'precision', 'recall', 'f1-score', 'support']])
+# Run the final, corrected analysis
+run_full_analysis_corrected()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+numpy
+pandas
+torch
+transformers