import requests import subprocess import os import logging import re import unicodedata from datetime import datetime #This code does gitea to github mirroring with description sanitization # ----------------- CONFIGURATION ----------------- GITEA_URL = "" GITEA_TOKEN = "" GITEA_USER = "" GITHUB_TOKEN = "" GITHUB_USER = "" # your GitHub username/org CLONE_DIR = "./gitea_repos" # Temporary folder for cloning # ------------------------------------------------- # Setup logging log_filename = f"mirror_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[ logging.FileHandler(log_filename), logging.StreamHandler() ] ) headers_gitea = {"Authorization": f"token {GITEA_TOKEN}"} os.makedirs(CLONE_DIR, exist_ok=True) # ----------------- SANITIZER ----------------- def sanitize_description(text: str) -> str: if not text: return "" # Remove ASCII control chars except: tab(9), LF(10), CR(13) text = re.sub(r"[\x00-\x08\x0B\x0C\x0E-\x1F]", " ", text) # Remove Unicode control characters from all languages text = "".join(ch if unicodedata.category(ch) != "Cc" else " " for ch in text) # Collapse multiple spaces text = re.sub(r"\s+", " ", text).strip() return text # ------------------------------------------------- # Step 1: Get all public Gitea repos def get_gitea_repos(): logging.info("Fetching repositories from Gitea...") repos = [] page = 1 while True: url = f"{GITEA_URL}/api/v1/users/{GITEA_USER}/repos?page={page}&limit=100" r = requests.get(url, headers=headers_gitea) if r.status_code != 200: logging.error(f"Failed to fetch Gitea repos: {r.text}") break data = r.json() if not data: break public_repos = [repo for repo in data if not repo.get("private", False)] repos.extend(public_repos) page += 1 logging.info(f"Found {len(repos)} public repositories on Gitea.") return repos # Step 2: Create GitHub repo def create_github_repo(repo_name, description=""): safe_desc = sanitize_description(description) url = f"https://api.github.com/user/repos" payload = {"name": repo_name, "private": False, "description": safe_desc} headers = {"Authorization": f"token {GITHUB_TOKEN}"} r = requests.post(url, json=payload, headers=headers) if r.status_code == 201: logging.info(f"GitHub repo created: {repo_name}") return True elif r.status_code == 422 and "already exists" in r.text: logging.info(f"GitHub repo already exists: {repo_name}") return True else: logging.error(f"Failed to create GitHub repo {repo_name}: {r.status_code} - {r.text}") return False # Step 3: Mirror from Gitea → GitHub def mirror_repo(repo_name, clone_url): local_path = os.path.join(CLONE_DIR, repo_name) try: if os.path.exists(local_path): logging.info(f"Repo {repo_name} already cloned. Fetching updates...") subprocess.run(["git", "--git-dir", f"{local_path}/.git", "fetch", "--all"], check=True) else: logging.info(f"Cloning {repo_name} from Gitea...") subprocess.run(["git", "clone", "--mirror", clone_url, local_path], check=True) github_url = f"https://{GITHUB_TOKEN}@github.com/{GITHUB_USER}/{repo_name}.git" logging.info(f"Pushing {repo_name} to GitHub...") subprocess.run(["git", "--git-dir", local_path, "push", "--mirror", github_url], check=True) logging.info(f"✅ Finished mirroring {repo_name}") except subprocess.CalledProcessError as e: logging.error(f"❌ Error mirroring {repo_name}: {e}") def main(): gitea_repos = get_gitea_repos() for repo in gitea_repos: repo_name = repo["name"] description = repo.get("description", "") clone_url = repo["clone_url"] logging.info(f"Processing repo: {repo_name}") if create_github_repo(repo_name, description): mirror_repo(repo_name, clone_url) if __name__ == "__main__": main() logging.info(f"All done! Log saved to {log_filename}")