50 lines
1.5 KiB
Python
50 lines
1.5 KiB
Python
import json
|
|
import uuid
|
|
import re
|
|
|
|
# Load the original JSON file
|
|
with open("structure.json", "r", encoding="utf-8") as file:
|
|
data = json.load(file)
|
|
|
|
# Create the new structure
|
|
new_structure = {"byId": {}, "allIds": []}
|
|
|
|
|
|
# Function to fix punctuation in titles
|
|
def fix_title_punctuation(title):
|
|
# Fix spaces around semicolons (ensure there's a space after each semicolon and no space before)
|
|
fixed_title = re.sub(r"\s*;\s*", "; ", title)
|
|
|
|
# Fix spaces around hyphens (ensure there's a space after but not before)
|
|
fixed_title = re.sub(r"(\w)-\s+", r"\1- ", fixed_title)
|
|
|
|
return fixed_title
|
|
|
|
|
|
# Process each item in the original array
|
|
for i, item in enumerate(data):
|
|
# Generate a new UUID
|
|
new_id = str(uuid.uuid4())
|
|
|
|
# Store the original item with the new ID
|
|
new_item = item.copy()
|
|
new_item["id"] = new_id
|
|
new_item["index"] = i
|
|
new_item["isFirst"] = i == 0
|
|
new_item["isLast"] = i == len(data) - 1
|
|
new_item["files"] = [f.replace("pdf", "html") for f in new_item["files"]]
|
|
|
|
# Fix the title punctuation
|
|
if "title" in new_item:
|
|
new_item["title"] = fix_title_punctuation(new_item["title"])
|
|
|
|
# Add to the new structure
|
|
new_structure["byId"][new_id] = new_item
|
|
new_structure["allIds"].append(new_id)
|
|
|
|
# Save the new structure to topics.json
|
|
with open("topics.json", "w", encoding="utf-8") as file:
|
|
json.dump(new_structure, file, ensure_ascii=False, indent=4)
|
|
|
|
print("topics.json created successfully with fixed punctuation")
|