med-notes/data_structure/add_id.py

import json
import uuid
import re

# Load the original JSON file
with open("structure.json", "r", encoding="utf-8") as file:
    data = json.load(file)

# Create the new structure
new_structure = {"byId": {}, "allIds": []}


# Function to fix punctuation in titles
def fix_title_punctuation(title):
    # Fix spaces around semicolons (ensure there's a space after each semicolon and no space before)
    fixed_title = re.sub(r"\s*;\s*", "; ", title)

    # Fix spaces around hyphens (ensure there's a space after but not before)
    fixed_title = re.sub(r"(\w)-\s+", r"\1- ", fixed_title)

    return fixed_title


# Process each item in the original array
for i, item in enumerate(data):
    # Generate a new UUID
    new_id = str(uuid.uuid4())

    # Store the original item with the new ID
    new_item = item.copy()
    new_item["id"] = new_id
    new_item["index"] = i
    new_item["isFirst"] = i == 0
    new_item["isLast"] = i == len(data) - 1
    new_item["files"] = [f.replace("pdf", "html") for f in new_item["files"]]

    # Fix the title punctuation
    if "title" in new_item:
        new_item["title"] = fix_title_punctuation(new_item["title"])

    # Add to the new structure
    new_structure["byId"][new_id] = new_item
    new_structure["allIds"].append(new_id)

# Save the new structure to topics.json
with open("topics.json", "w", encoding="utf-8") as file:
    json.dump(new_structure, file, ensure_ascii=False, indent=4)

print("topics.json created successfully with fixed punctuation")