import yaml import requests import os import re from datetime import datetime from typing import Dict, List, Any def fetch_conference_files() -> List[Dict[str, Any]]: """Fetch all conference YAML files from ccfddl repository.""" # First get the directory listing from GitHub API api_url = "https://api.github.com/repos/ccfddl/ccf-deadlines/contents/conference/AI" response = requests.get(api_url) files = response.json() conferences = [] for file in files: if file["name"].endswith(".yml"): yaml_content = requests.get(file["download_url"]).text conf_data = yaml.safe_load(yaml_content) # The data is a list with a single item if isinstance(conf_data, list) and len(conf_data) > 0: conferences.append(conf_data[0]) return conferences def parse_date_range(date_str: str, year: str) -> tuple[str, str]: """Parse various date formats and return start and end dates.""" # Remove the year if it appears at the end of the string date_str = date_str.replace(f", {year}", "") # Handle various date formats try: # Split into start and end dates if " - " in date_str: start, end = date_str.split(" - ") elif "-" in date_str: start, end = date_str.split("-") else: # For single date format like "May 19, 2025" start = end = date_str # Clean up month abbreviations month_map = { "Sept": "September", # Handle Sept before Sep "Jan": "January", "Feb": "February", "Mar": "March", "Apr": "April", "Jun": "June", "Jul": "July", "Aug": "August", "Sep": "September", "Oct": "October", "Nov": "November", "Dec": "December", } # Create a set of all month names (full and abbreviated) all_months = set(month_map.keys()) | set(month_map.values()) # Handle cases like "April 29-May 4" has_month = any(month in end for month in all_months) if not has_month: # End is just a day number, use start's month start_parts = start.split() if len(start_parts) >= 1: end = f"{start_parts[0]} {end.strip()}" # Replace month abbreviations for abbr, full in month_map.items(): start = start.replace(abbr, full) end = end.replace(abbr, full) # Clean up any extra spaces start = " ".join(start.split()) end = " ".join(end.split()) # Parse start date start_date = datetime.strptime(f"{start}, {year}", "%B %d, %Y") # Parse end date end_date = datetime.strptime(f"{end}, {year}", "%B %d, %Y") return start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d") except Exception as e: raise ValueError(f"Could not parse date: {date_str} ({e})") def transform_conference_data( conferences: List[Dict[str, Any]], ) -> List[Dict[str, Any]]: """Transform ccfddl format to our format.""" transformed = [] current_year = datetime.now().year for conf in conferences: # Get the most recent or upcoming conference instance recent_conf = None if "confs" in conf: for instance in conf["confs"]: if instance["year"] >= current_year: recent_conf = instance break if not recent_conf: continue # Transform to our format transformed_conf = { "title": conf.get("title", ""), "year": recent_conf["year"], "id": recent_conf["id"], "full_name": conf.get("description", ""), "link": recent_conf.get("link", ""), "deadline": recent_conf.get("timeline", [{}])[0].get("deadline", ""), "timezone": recent_conf.get("timezone", ""), "date": recent_conf.get("date", ""), "tags": [], # We'll need to maintain a mapping for tags } # Handle city and country fields instead of place place = recent_conf.get("place", "") if place: # Try to parse the place into city and country if it contains a comma if "," in place: city, country = place.split(",", 1) transformed_conf["city"] = city.strip() transformed_conf["country"] = country.strip() else: # If we can't parse, just set the country transformed_conf["country"] = place.strip() # Add optional fields timeline = recent_conf.get("timeline", [{}])[0] if "abstract_deadline" in timeline: transformed_conf["abstract_deadline"] = timeline["abstract_deadline"] # Parse date range for start/end try: if transformed_conf["date"]: start_date, end_date = parse_date_range( transformed_conf["date"], str(transformed_conf["year"]) ) transformed_conf["start"] = start_date transformed_conf["end"] = end_date except Exception as e: print(f"Warning: Could not parse date for {transformed_conf['title']}: {e}") # Add rankings as separate field if "rank" in conf: rankings = [] for rank_type, rank_value in conf["rank"].items(): rankings.append(f"{rank_type.upper()}: {rank_value}") if rankings: transformed_conf["rankings"] = ", ".join(rankings) transformed.append(transformed_conf) return transformed def load_all_current_conferences() -> Dict[str, List[Dict[str, Any]]]: """Load all current conferences from individual files.""" conferences_dir = "src/data/conferences" conference_groups = {} if not os.path.exists(conferences_dir): return {} for filename in os.listdir(conferences_dir): if filename.endswith(".yml"): filepath = os.path.join(conferences_dir, filename) with open(filepath, "r") as f: conferences = yaml.safe_load(f) if conferences: # Extract conference title from the first entry title = conferences[0]["title"] conference_groups[title] = conferences return conference_groups def create_filename_from_title(title: str) -> str: """Create a filename-safe version of the conference title.""" filename = re.sub(r"[^a-zA-Z0-9\s&()-]", "", title.lower()) filename = re.sub(r"\s+", "_", filename) filename = filename.replace("&", "and") filename = filename.strip("_") return filename def update_conference_loader(): """Update the conference loader file with all current conferences.""" conferences_dir = "src/data/conferences" loader_path = "src/utils/conferenceLoader.ts" # Get all conference files conference_files = [] if os.path.exists(conferences_dir): for filename in sorted(os.listdir(conferences_dir)): if filename.endswith(".yml"): conference_files.append(filename) # Generate import statements imports = [] variable_names = [] for filename in conference_files: # Create variable name from filename var_name = filename.replace(".yml", "").replace("-", "_") + "Data" variable_names.append(var_name) imports.append(f"import {var_name} from '@/data/conferences/{filename}';") # Generate the loader file content loader_content = f"""import {{ Conference }} from '@/types/conference'; // Import all conference YAML files {chr(10).join(imports)} // Combine all conference data into a single array const allConferencesData: Conference[] = [ {chr(10).join(f" ...{var_name}," for var_name in variable_names)} ]; export default allConferencesData;""" # Write the loader file with open(loader_path, "w") as f: f.write(loader_content) print(f"Updated conference loader with {len(conference_files)} conference files") def main(): try: # Load current conferences from individual files current_conference_groups = load_all_current_conferences() # Fetch and transform new data new_conferences = fetch_conference_files() if not new_conferences: print("Warning: No conferences fetched from ccfddl") return transformed_conferences = transform_conference_data(new_conferences) if not transformed_conferences: print("Warning: No conferences transformed") return # Create conferences directory if it doesn't exist conferences_dir = "src/data/conferences" os.makedirs(conferences_dir, exist_ok=True) # Group new conferences by title new_conference_groups = {} for conf in transformed_conferences: title = conf["title"] if title not in new_conference_groups: new_conference_groups[title] = [] new_conference_groups[title].append(conf) # Update each conference group updated_count = 0 for title, new_confs in new_conference_groups.items(): filename = create_filename_from_title(title) + ".yml" filepath = os.path.join(conferences_dir, filename) # Get current conferences for this title current_confs = current_conference_groups.get(title, []) current_conf_dict = {conf["id"]: conf for conf in current_confs} # Update or add new conferences for new_conf in new_confs: if new_conf["id"] in current_conf_dict: # Update existing conference while preserving fields curr_conf = current_conf_dict[new_conf["id"]] # Preserve existing fields preserved_fields = [ "tags", "venue", "hindex", "submission_deadline", "timezone_submission", "rebuttal_period_start", "rebuttal_period_end", "final_decision_date", "review_release_date", "commitment_deadline", "start", "end", "note", "city", "country", "deadlines", ] for field in preserved_fields: if field in curr_conf: new_conf[field] = curr_conf[field] # Preserve existing rankings if available if "rankings" in curr_conf: new_conf["rankings"] = curr_conf["rankings"] current_conf_dict[new_conf["id"]] = new_conf else: # Add new conference current_conf_dict[new_conf["id"]] = new_conf # Convert back to list and sort by year all_confs = list(current_conf_dict.values()) all_confs.sort(key=lambda x: x.get("year", 9999)) # Write to individual file with open(filepath, "w") as f: yaml.dump( all_confs, f, default_flow_style=False, sort_keys=False, allow_unicode=True, ) updated_count += 1 print(f"Updated {filename} with {len(all_confs)} entries") # Update the conference loader update_conference_loader() print(f"Successfully updated {updated_count} conference files") except Exception as e: print(f"Error: {e}") raise if __name__ == "__main__": main()