mirror-gitea-github/gitea-github-mirror-santize.py

125 lines
4.2 KiB
Python

import requests
import subprocess
import os
import logging
import re
import unicodedata
from datetime import datetime
#This code does gitea to github mirroring with description sanitization
# ----------------- CONFIGURATION -----------------
GITEA_URL = ""
GITEA_TOKEN = ""
GITEA_USER = ""
GITHUB_TOKEN = ""
GITHUB_USER = "" # your GitHub username/org
CLONE_DIR = "./gitea_repos" # Temporary folder for cloning
# -------------------------------------------------
# Setup logging
log_filename = f"mirror_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(log_filename),
logging.StreamHandler()
]
)
headers_gitea = {"Authorization": f"token {GITEA_TOKEN}"}
os.makedirs(CLONE_DIR, exist_ok=True)
# ----------------- SANITIZER -----------------
def sanitize_description(text: str) -> str:
if not text:
return ""
# Remove ASCII control chars except: tab(9), LF(10), CR(13)
text = re.sub(r"[\x00-\x08\x0B\x0C\x0E-\x1F]", " ", text)
# Remove Unicode control characters from all languages
text = "".join(ch if unicodedata.category(ch) != "Cc" else " " for ch in text)
# Collapse multiple spaces
text = re.sub(r"\s+", " ", text).strip()
return text
# -------------------------------------------------
# Step 1: Get all public Gitea repos
def get_gitea_repos():
logging.info("Fetching repositories from Gitea...")
repos = []
page = 1
while True:
url = f"{GITEA_URL}/api/v1/users/{GITEA_USER}/repos?page={page}&limit=100"
r = requests.get(url, headers=headers_gitea)
if r.status_code != 200:
logging.error(f"Failed to fetch Gitea repos: {r.text}")
break
data = r.json()
if not data:
break
public_repos = [repo for repo in data if not repo.get("private", False)]
repos.extend(public_repos)
page += 1
logging.info(f"Found {len(repos)} public repositories on Gitea.")
return repos
# Step 2: Create GitHub repo
def create_github_repo(repo_name, description=""):
safe_desc = sanitize_description(description)
url = f"https://api.github.com/user/repos"
payload = {"name": repo_name, "private": False, "description": safe_desc}
headers = {"Authorization": f"token {GITHUB_TOKEN}"}
r = requests.post(url, json=payload, headers=headers)
if r.status_code == 201:
logging.info(f"GitHub repo created: {repo_name}")
return True
elif r.status_code == 422 and "already exists" in r.text:
logging.info(f"GitHub repo already exists: {repo_name}")
return True
else:
logging.error(f"Failed to create GitHub repo {repo_name}: {r.status_code} - {r.text}")
return False
# Step 3: Mirror from Gitea → GitHub
def mirror_repo(repo_name, clone_url):
local_path = os.path.join(CLONE_DIR, repo_name)
try:
if os.path.exists(local_path):
logging.info(f"Repo {repo_name} already cloned. Fetching updates...")
subprocess.run(["git", "--git-dir", f"{local_path}/.git", "fetch", "--all"], check=True)
else:
logging.info(f"Cloning {repo_name} from Gitea...")
subprocess.run(["git", "clone", "--mirror", clone_url, local_path], check=True)
github_url = f"https://{GITHUB_TOKEN}@github.com/{GITHUB_USER}/{repo_name}.git"
logging.info(f"Pushing {repo_name} to GitHub...")
subprocess.run(["git", "--git-dir", local_path, "push", "--mirror", github_url], check=True)
logging.info(f"✅ Finished mirroring {repo_name}")
except subprocess.CalledProcessError as e:
logging.error(f"❌ Error mirroring {repo_name}: {e}")
def main():
gitea_repos = get_gitea_repos()
for repo in gitea_repos:
repo_name = repo["name"]
description = repo.get("description", "")
clone_url = repo["clone_url"]
logging.info(f"Processing repo: {repo_name}")
if create_github_repo(repo_name, description):
mirror_repo(repo_name, clone_url)
if __name__ == "__main__":
main()
logging.info(f"All done! Log saved to {log_filename}")