Spaces:
Running
Running
| import yaml | |
| import requests | |
| import os | |
| import re | |
| from datetime import datetime | |
| from typing import Dict, List, Any | |
| def fetch_conference_files() -> List[Dict[str, Any]]: | |
| """Fetch all conference YAML files from ccfddl repository.""" | |
| # First get the directory listing from GitHub API | |
| api_url = "https://api.github.com/repos/ccfddl/ccf-deadlines/contents/conference/AI" | |
| response = requests.get(api_url) | |
| files = response.json() | |
| conferences = [] | |
| for file in files: | |
| if file["name"].endswith(".yml"): | |
| yaml_content = requests.get(file["download_url"]).text | |
| conf_data = yaml.safe_load(yaml_content) | |
| # The data is a list with a single item | |
| if isinstance(conf_data, list) and len(conf_data) > 0: | |
| conferences.append(conf_data[0]) | |
| return conferences | |
| def parse_date_range(date_str: str, year: str) -> tuple[str, str]: | |
| """Parse various date formats and return start and end dates.""" | |
| # Remove the year if it appears at the end of the string | |
| date_str = date_str.replace(f", {year}", "") | |
| # Handle various date formats | |
| try: | |
| # Split into start and end dates | |
| if " - " in date_str: | |
| start, end = date_str.split(" - ") | |
| elif "-" in date_str: | |
| start, end = date_str.split("-") | |
| else: | |
| # For single date format like "May 19, 2025" | |
| start = end = date_str | |
| # Clean up month abbreviations | |
| month_map = { | |
| "Sept": "September", # Handle Sept before Sep | |
| "Jan": "January", | |
| "Feb": "February", | |
| "Mar": "March", | |
| "Apr": "April", | |
| "Jun": "June", | |
| "Jul": "July", | |
| "Aug": "August", | |
| "Sep": "September", | |
| "Oct": "October", | |
| "Nov": "November", | |
| "Dec": "December", | |
| } | |
| # Create a set of all month names (full and abbreviated) | |
| all_months = set(month_map.keys()) | set(month_map.values()) | |
| # Handle cases like "April 29-May 4" | |
| has_month = any(month in end for month in all_months) | |
| if not has_month: | |
| # End is just a day number, use start's month | |
| start_parts = start.split() | |
| if len(start_parts) >= 1: | |
| end = f"{start_parts[0]} {end.strip()}" | |
| # Replace month abbreviations | |
| for abbr, full in month_map.items(): | |
| start = start.replace(abbr, full) | |
| end = end.replace(abbr, full) | |
| # Clean up any extra spaces | |
| start = " ".join(start.split()) | |
| end = " ".join(end.split()) | |
| # Parse start date | |
| start_date = datetime.strptime(f"{start}, {year}", "%B %d, %Y") | |
| # Parse end date | |
| end_date = datetime.strptime(f"{end}, {year}", "%B %d, %Y") | |
| return start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d") | |
| except Exception as e: | |
| raise ValueError(f"Could not parse date: {date_str} ({e})") | |
| def transform_conference_data( | |
| conferences: List[Dict[str, Any]], | |
| ) -> List[Dict[str, Any]]: | |
| """Transform ccfddl format to our format.""" | |
| transformed = [] | |
| current_year = datetime.now().year | |
| for conf in conferences: | |
| # Get the most recent or upcoming conference instance | |
| recent_conf = None | |
| if "confs" in conf: | |
| for instance in conf["confs"]: | |
| if instance["year"] >= current_year: | |
| recent_conf = instance | |
| break | |
| if not recent_conf: | |
| continue | |
| # Transform to our format | |
| transformed_conf = { | |
| "title": conf.get("title", ""), | |
| "year": recent_conf["year"], | |
| "id": recent_conf["id"], | |
| "full_name": conf.get("description", ""), | |
| "link": recent_conf.get("link", ""), | |
| "deadline": recent_conf.get("timeline", [{}])[0].get("deadline", ""), | |
| "timezone": recent_conf.get("timezone", ""), | |
| "date": recent_conf.get("date", ""), | |
| "tags": [], # We'll need to maintain a mapping for tags | |
| } | |
| # Handle city and country fields instead of place | |
| place = recent_conf.get("place", "") | |
| if place: | |
| # Try to parse the place into city and country if it contains a comma | |
| if "," in place: | |
| city, country = place.split(",", 1) | |
| transformed_conf["city"] = city.strip() | |
| transformed_conf["country"] = country.strip() | |
| else: | |
| # If we can't parse, just set the country | |
| transformed_conf["country"] = place.strip() | |
| # Add optional fields | |
| timeline = recent_conf.get("timeline", [{}])[0] | |
| if "abstract_deadline" in timeline: | |
| transformed_conf["abstract_deadline"] = timeline["abstract_deadline"] | |
| # Parse date range for start/end | |
| try: | |
| if transformed_conf["date"]: | |
| start_date, end_date = parse_date_range( | |
| transformed_conf["date"], str(transformed_conf["year"]) | |
| ) | |
| transformed_conf["start"] = start_date | |
| transformed_conf["end"] = end_date | |
| except Exception as e: | |
| print(f"Warning: Could not parse date for {transformed_conf['title']}: {e}") | |
| # Add rankings as separate field | |
| if "rank" in conf: | |
| rankings = [] | |
| for rank_type, rank_value in conf["rank"].items(): | |
| rankings.append(f"{rank_type.upper()}: {rank_value}") | |
| if rankings: | |
| transformed_conf["rankings"] = ", ".join(rankings) | |
| transformed.append(transformed_conf) | |
| return transformed | |
| def load_all_current_conferences() -> Dict[str, List[Dict[str, Any]]]: | |
| """Load all current conferences from individual files.""" | |
| conferences_dir = "src/data/conferences" | |
| conference_groups = {} | |
| if not os.path.exists(conferences_dir): | |
| return {} | |
| for filename in os.listdir(conferences_dir): | |
| if filename.endswith(".yml"): | |
| filepath = os.path.join(conferences_dir, filename) | |
| with open(filepath, "r") as f: | |
| conferences = yaml.safe_load(f) | |
| if conferences: | |
| # Extract conference title from the first entry | |
| title = conferences[0]["title"] | |
| conference_groups[title] = conferences | |
| return conference_groups | |
| def create_filename_from_title(title: str) -> str: | |
| """Create a filename-safe version of the conference title.""" | |
| filename = re.sub(r"[^a-zA-Z0-9\s&()-]", "", title.lower()) | |
| filename = re.sub(r"\s+", "_", filename) | |
| filename = filename.replace("&", "and") | |
| filename = filename.strip("_") | |
| return filename | |
| def update_conference_loader(): | |
| """Update the conference loader file with all current conferences.""" | |
| conferences_dir = "src/data/conferences" | |
| loader_path = "src/utils/conferenceLoader.ts" | |
| # Get all conference files | |
| conference_files = [] | |
| if os.path.exists(conferences_dir): | |
| for filename in sorted(os.listdir(conferences_dir)): | |
| if filename.endswith(".yml"): | |
| conference_files.append(filename) | |
| # Generate import statements | |
| imports = [] | |
| variable_names = [] | |
| for filename in conference_files: | |
| # Create variable name from filename | |
| var_name = filename.replace(".yml", "").replace("-", "_") + "Data" | |
| variable_names.append(var_name) | |
| imports.append(f"import {var_name} from '@/data/conferences/{filename}';") | |
| # Generate the loader file content | |
| loader_content = f"""import {{ Conference }} from '@/types/conference'; | |
| // Import all conference YAML files | |
| {chr(10).join(imports)} | |
| // Combine all conference data into a single array | |
| const allConferencesData: Conference[] = [ | |
| {chr(10).join(f" ...{var_name}," for var_name in variable_names)} | |
| ]; | |
| export default allConferencesData;""" | |
| # Write the loader file | |
| with open(loader_path, "w") as f: | |
| f.write(loader_content) | |
| print(f"Updated conference loader with {len(conference_files)} conference files") | |
| def main(): | |
| try: | |
| # Load current conferences from individual files | |
| current_conference_groups = load_all_current_conferences() | |
| # Fetch and transform new data | |
| new_conferences = fetch_conference_files() | |
| if not new_conferences: | |
| print("Warning: No conferences fetched from ccfddl") | |
| return | |
| transformed_conferences = transform_conference_data(new_conferences) | |
| if not transformed_conferences: | |
| print("Warning: No conferences transformed") | |
| return | |
| # Create conferences directory if it doesn't exist | |
| conferences_dir = "src/data/conferences" | |
| os.makedirs(conferences_dir, exist_ok=True) | |
| # Group new conferences by title | |
| new_conference_groups = {} | |
| for conf in transformed_conferences: | |
| title = conf["title"] | |
| if title not in new_conference_groups: | |
| new_conference_groups[title] = [] | |
| new_conference_groups[title].append(conf) | |
| # Update each conference group | |
| updated_count = 0 | |
| for title, new_confs in new_conference_groups.items(): | |
| filename = create_filename_from_title(title) + ".yml" | |
| filepath = os.path.join(conferences_dir, filename) | |
| # Get current conferences for this title | |
| current_confs = current_conference_groups.get(title, []) | |
| current_conf_dict = {conf["id"]: conf for conf in current_confs} | |
| # Update or add new conferences | |
| for new_conf in new_confs: | |
| if new_conf["id"] in current_conf_dict: | |
| # Update existing conference while preserving fields | |
| curr_conf = current_conf_dict[new_conf["id"]] | |
| # Preserve existing fields | |
| preserved_fields = [ | |
| "tags", | |
| "venue", | |
| "hindex", | |
| "submission_deadline", | |
| "timezone_submission", | |
| "rebuttal_period_start", | |
| "rebuttal_period_end", | |
| "final_decision_date", | |
| "review_release_date", | |
| "commitment_deadline", | |
| "start", | |
| "end", | |
| "note", | |
| "city", | |
| "country", | |
| "deadlines", | |
| ] | |
| for field in preserved_fields: | |
| if field in curr_conf: | |
| new_conf[field] = curr_conf[field] | |
| # Preserve existing rankings if available | |
| if "rankings" in curr_conf: | |
| new_conf["rankings"] = curr_conf["rankings"] | |
| current_conf_dict[new_conf["id"]] = new_conf | |
| else: | |
| # Add new conference | |
| current_conf_dict[new_conf["id"]] = new_conf | |
| # Convert back to list and sort by year | |
| all_confs = list(current_conf_dict.values()) | |
| all_confs.sort(key=lambda x: x.get("year", 9999)) | |
| # Write to individual file | |
| with open(filepath, "w") as f: | |
| yaml.dump( | |
| all_confs, | |
| f, | |
| default_flow_style=False, | |
| sort_keys=False, | |
| allow_unicode=True, | |
| ) | |
| updated_count += 1 | |
| print(f"Updated {filename} with {len(all_confs)} entries") | |
| # Update the conference loader | |
| update_conference_loader() | |
| print(f"Successfully updated {updated_count} conference files") | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| raise | |
| if __name__ == "__main__": | |
| main() | |