import os import re import shutil def clean_md_file(input_path, output_path): with open(input_path, 'r', encoding='utf-8') as f: content = f.read() # Remove YAML frontmatter (with surrounding ---) content = re.sub(r'^---\n(.*?\n)*?---\n', '', content, flags=re.DOTALL) # Remove all Hugo shortcodes like {{< ... >}} or {{< /... >}} content = re.sub(r'{{<[^>]*>}}', '', content) # Remove comments shortcode and everything after content = re.sub(r'{{<\s*comments\s*>}}.*', '', content, flags=re.DOTALL) # Remove trailing shell prompt lines (optional) content = re.sub(r'^hacker@selfhost1:.*$', '', content, flags=re.MULTILINE) # Collapse multiple blank lines to max two content = re.sub(r'\n{3,}', '\n\n', content) # Strip leading/trailing whitespace content = content.strip() # Write cleaned content with open(output_path, 'w', encoding='utf-8') as f: f.write(content) print(f"Processed: {input_path} -> {output_path}") def batch_clean_md_files(): input_dir = '/var/www/html/professional-site/content/post' assets_dir = '/var/www/html/professional-site/assets/summaries' static_dir = '/var/www/html/professional-site/static/summaries' os.makedirs(assets_dir, exist_ok=True) os.makedirs(static_dir, exist_ok=True) for filename in os.listdir(input_dir): if filename.endswith('.md'): input_path = os.path.join(input_dir, filename) output_filename = filename.replace('.md', '.txt') output_path_assets = os.path.join(assets_dir, output_filename) output_path_static = os.path.join(static_dir, output_filename) clean_md_file(input_path, output_path_assets) # Also copy the cleaned file to static folder shutil.copyfile(output_path_assets, output_path_static) print(f"Copied summary to static: {output_path_static}") batch_clean_md_files()