med-notes/data_structure/add_id.py
2025-05-09 05:30:08 +02:00

50 lines
1.5 KiB
Python

import json
import uuid
import re
# Load the original JSON file
with open("structure.json", "r", encoding="utf-8") as file:
data = json.load(file)
# Create the new structure
new_structure = {"byId": {}, "allIds": []}
# Function to fix punctuation in titles
def fix_title_punctuation(title):
# Fix spaces around semicolons (ensure there's a space after each semicolon and no space before)
fixed_title = re.sub(r"\s*;\s*", "; ", title)
# Fix spaces around hyphens (ensure there's a space after but not before)
fixed_title = re.sub(r"(\w)-\s+", r"\1- ", fixed_title)
return fixed_title
# Process each item in the original array
for i, item in enumerate(data):
# Generate a new UUID
new_id = str(uuid.uuid4())
# Store the original item with the new ID
new_item = item.copy()
new_item["id"] = new_id
new_item["index"] = i
new_item["isFirst"] = i == 0
new_item["isLast"] = i == len(data) - 1
new_item["files"] = [f.replace("pdf", "html") for f in new_item["files"]]
# Fix the title punctuation
if "title" in new_item:
new_item["title"] = fix_title_punctuation(new_item["title"])
# Add to the new structure
new_structure["byId"][new_id] = new_item
new_structure["allIds"].append(new_id)
# Save the new structure to topics.json
with open("topics.json", "w", encoding="utf-8") as file:
json.dump(new_structure, file, ensure_ascii=False, indent=4)
print("topics.json created successfully with fixed punctuation")