57 lines
1.9 KiB
Python
57 lines
1.9 KiB
Python
import os
|
|
import re
|
|
import shutil
|
|
|
|
def clean_md_file(input_path, output_path):
|
|
with open(input_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Remove YAML frontmatter (with surrounding ---)
|
|
content = re.sub(r'^---\n(.*?\n)*?---\n', '', content, flags=re.DOTALL)
|
|
|
|
# Remove all Hugo shortcodes like {{< ... >}} or {{< /... >}}
|
|
content = re.sub(r'{{<[^>]*>}}', '', content)
|
|
|
|
# Remove comments shortcode and everything after
|
|
content = re.sub(r'{{<\s*comments\s*>}}.*', '', content, flags=re.DOTALL)
|
|
|
|
# Remove trailing shell prompt lines (optional)
|
|
content = re.sub(r'^hacker@selfhost1:.*$', '', content, flags=re.MULTILINE)
|
|
|
|
# Collapse multiple blank lines to max two
|
|
content = re.sub(r'\n{3,}', '\n\n', content)
|
|
|
|
# Strip leading/trailing whitespace
|
|
content = content.strip()
|
|
|
|
# Write cleaned content
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
f.write(content)
|
|
|
|
print(f"Processed: {input_path} -> {output_path}")
|
|
|
|
def batch_clean_md_files():
|
|
input_dir = '/var/www/html/professional-site/content/post'
|
|
assets_dir = '/var/www/html/professional-site/assets/summaries'
|
|
static_dir = '/var/www/html/professional-site/static/summaries'
|
|
|
|
os.makedirs(assets_dir, exist_ok=True)
|
|
os.makedirs(static_dir, exist_ok=True)
|
|
|
|
for filename in os.listdir(input_dir):
|
|
if filename.endswith('.md'):
|
|
input_path = os.path.join(input_dir, filename)
|
|
output_filename = filename.replace('.md', '.txt')
|
|
|
|
output_path_assets = os.path.join(assets_dir, output_filename)
|
|
output_path_static = os.path.join(static_dir, output_filename)
|
|
|
|
clean_md_file(input_path, output_path_assets)
|
|
|
|
# Also copy the cleaned file to static folder
|
|
shutil.copyfile(output_path_assets, output_path_static)
|
|
print(f"Copied summary to static: {output_path_static}")
|
|
|
|
batch_clean_md_files()
|
|
|