spielplatz/redmine2gitea-wiki/redmine_to_gitea_wiki.py

import os
import requests
import re
from bs4 import BeautifulSoup
import subprocess
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# === Configuration ===
REDMINE_URL = "https://redmine.maketank.net"  # e.g., https://redmine.example.com
REDMINE_API_KEY = "62120b7ed6bf84023b2a1d7d5a265cf172f8d607"
PROJECT_ID = "bastelstube"  # e.g., "my-project"

#GITEA_WIKI_URL = "https://git.maketank.net/chaos/spielplatz.wiki.git"  # Gitea wiki Git URL
GITEA_WIKI_URL = "ssh://git@git.maketank.net:2222/chaos/spielplatz.wiki.git"
#GITEA_USERNAME = "do"
#GITEA_TOKEN = "2d2be00637182c6d5411512f456218ee59fa1f81"  # Use a personal access token

# Optional: Gitea API for creating pages (if you want to use it instead of Git)
# GITEA_API_URL = "https://gitea.example.com/api/v1"

# === Functions ===

def redmine_get_wiki_pages():
    """Get all wiki pages from Redmine project"""
    # First get the list of pages and their parent information
    index_url = f"{REDMINE_URL}/projects/{PROJECT_ID}/wiki/index.xml"
    headers = {"X-Redmine-API-Key": REDMINE_API_KEY}
    response = requests.get(index_url, headers=headers)
    response.raise_for_status()

    soup = BeautifulSoup(response.content, "xml")
    pages = []

    for page in soup.find_all("wiki_page"):
        title = page.find("title").text
        parent = page.find('parent')
        if parent:
            parent = parent['title']
        else:
            parent = None  # Root level pages have no parent

        logger.info(f"Found page: {title} (Parent: {parent})")

        # Fetch individual page content
        page_url = f"{REDMINE_URL}/projects/{PROJECT_ID}/wiki/{title}.xml"
        try:
            page_response = requests.get(page_url, headers=headers)
            page_response.raise_for_status()

            page_soup = BeautifulSoup(page_response.content, "xml")
            content = page_soup.find("text").text if page_soup.find("text") else ""

            pages.append({"title": title, "parent": parent, "content": content})
            logger.info(f"Fetched content for: {title}")
        except Exception as e:
            logger.warning(f"Failed to fetch content for page '{title}': {e}")
            # Add empty content if fetch fails
            pages.append({"title": title, "parent": parent, "content": ""})

    return pages

def redmine_markup_to_markdown(content, pages):
    """Convert Redmine wiki markup to Markdown"""

    # Convert links - handle both [[page]] and [page] syntax
    def replace_link(match):
        link = match.group(1)

        if link in pages:  # Ensure the linked page exists
            # Create a relative path based on parent-child hierarchy
            href = f"{pages[link]['title'].replace(' ', '_')}.md"

            return f"[{link}]({href})"

        else:
            logger.warning(f"Linked page not found: {link}")
            # Return original link if linked page not found
            return match.group()

    # Replace [[page]] with [page](page_url)
    content = re.sub(r'\[\[(.*?)\]\]', replace_link, content)

    # Convert bold (strong in textile)
    content = re.sub(r'\*\*(.*?)\*\*', r'**\1**', content)
    content = re.sub(r'\*(.*?)\*', r'*\1*', content)

    # Convert headings
    content = re.sub(r'^h1\.(.*?)$', r'# \1', content, flags=re.MULTILINE)
    content = re.sub(r'^h2\.(.*?)$', r'## \1', content, flags=re.MULTILINE)
    content = re.sub(r'^h3\.(.*?)$', r'### \1', content, flags=re.MULTILINE)
    content = re.sub(r'^h4\.(.*?)$', r'#### \1', content, flags=re.MULTILINE)
    content = re.sub(r'^h5\.(.*?)$', r'##### \1', content, flags=re.MULTILINE)
    content = re.sub(r'^h6\.(.*?)$', r'###### \1', content, flags=re.MULTILINE)

    # Convert lists
    content = re.sub(r'^\s*\* (.*?)$', r'- \1', content, flags=re.MULTILINE)
    content = re.sub(r'^\s*\# (.*?)$', r'1. \1', content, flags=re.MULTILINE)

    # Convert images - handle !image! and ![alt](image)
    content = re.sub(r'!\[(.*?)\]\((.*?)\)', r'![](\2)', content)
    content = re.sub(r'!(.*?)!', r'![](\\1)', content)

    # Convert code blocks
    content = re.sub(r'<pre>(.*?)</pre>', r'```python\n\1\n```', content, flags=re.DOTALL)

    # Convert inline code
    content = re.sub(r'\{\{(.*?)\}\}', r'`\1`', content)

    # Remove HTML tags (if any)
    content = re.sub(r'<[^>]+>', '', content)

    # Handle textile-specific elements like blockquotes
    content = re.sub(r'^bq\.(.*?)$', r'> \1', content, flags=re.MULTILINE)

    return content

def download_image(image_url, local_dir):
    """Download an image from Redmine and save it locally"""
    try:
        headers = {"X-Redmine-API-Key": REDMINE_API_KEY}
        response = requests.get(f"{REDMINE_URL}{image_url}", headers=headers)
        response.raise_for_status()

        # Extract filename from URL
        filename = os.path.basename(image_url)
        filepath = os.path.join(local_dir, "images", filename)

        # Create images directory if it doesn't exist
        os.makedirs(os.path.dirname(filepath), exist_ok=True)

        with open(filepath, 'wb') as f:
            f.write(response.content)

        logger.info(f"Downloaded image: {filename}")
        return f"./images/{filename}"
    except Exception as e:
        logger.warning(f"Failed to download image from {image_url}: {e}")
        return None

def clone_or_init_wiki_repo(repo_url, local_dir):
    """Clone or initialize the Gitea wiki repo"""
    if not os.path.exists(local_dir):
        logger.info(f"Cloning Gitea wiki repository: {repo_url}")
        subprocess.run(["git", "clone", repo_url, local_dir], check=True)
    else:
        logger.info(f"Using existing local wiki repo: {local_dir}")
        os.chdir(local_dir)
        subprocess.run(["git", "pull"], check=True)
        os.chdir('..')
    return local_dir

def push_to_gitea_wiki(local_dir, pages):
    """Add pages as Markdown files and push to Gitea"""
    page_map = {page['title']: page for page in pages}  # Map titles to their data
    curr_dir = os.getcwd()
    logger.info(f"We are in: {curr_dir}")
    # Remove all existing files in the repo (except .git folder)
    for item in os.listdir(local_dir):
        if item != '.git':
            item_path = os.path.join(local_dir, item)
            if os.path.isfile(item_path):
                os.remove(item_path)
            elif os.path.isdir(item_path):
                import shutil
                shutil.rmtree(item_path)

    # Create pages as .md files
    for page in pages:
        title = page['title']
        parent = page.get('parent')

        if title == 'Wiki':
            filename = "Home.md"
        else:
            filename = f"{title.replace(' ', '_')}.md"

        filepath = os.path.join(local_dir, filename)

        if parent and parent in page_map:
            # If this page has a parent, create relative path from the parent's directory
            parent_filename = f"{parent.replace(' ', '_')}.md"
            parent_filepath = os.path.join(local_dir, parent_filename)

            if os.path.exists(parent_filepath):
                # Parent directory exists, create subdirectory for this page
                page_subdir = os.path.dirname(filepath)
                os.makedirs(page_subdir, exist_ok=True)

        else:
            # This is a root level page, no need to create subdirectories
            pass

        # Convert content to Markdown and handle links
        markdown_content = redmine_markup_to_markdown(page['content'], page_map)

        # Find all images in the content to log them
        image_pattern = r'!(.*?)!'
        found_images = re.findall(image_pattern, markdown_content)
        if found_images:
            logger.info(f"Found images on page '{title}': {found_images}")

        # Process all !image! patterns in the content
        original_content = markdown_content

        # Find all image tags in the format !image_name!
        image_tags = re.findall(r'!(.*?)!', original_content)

        # For each image tag, attempt to download it and replace it with Markdown syntax
        for image_tag in image_tags:
            if image_tag and not image_tag.startswith('\\'):
                logger.info(f"Found image tag: {image_tag}")
                # Replace the specific tag with our download logic
                markdown_content = markdown_content.replace(
                    f"!{image_tag}!",
                    handle_image_tag(image_tag, local_dir)
                )

        # Write to file
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(f"# {title}\n\n")
            f.write(markdown_content)

        logger.info(f"Saved page: {filename}")

    # Add, commit, and push
    os.chdir(local_dir)
    subprocess.run(["git", "add", "."], check=True)
    subprocess.run(["git", "commit", "-m", "'Import Redmine wiki pages'"], check=False)
    subprocess.run(["git", "push", "origin", "main"], check=True)

    logger.info("✅ Wiki pages pushed to Gitea!")

def handle_image_tag(image_name, local_dir):
    """Handle replacement of image tags with downloaded images"""
    # Check if the image has a valid extension (.png, .jpg, or .jpeg)
    valid_extensions = ('.png', '.jpg', '.jpeg')
    if not any(image_name.lower().endswith(ext) for ext in valid_extensions):
        logger.warning(f"Invalid image extension in tag: {image_name}")
        return f"!{image_name}!"  # Return original format for invalid extensions

    if image_name and not image_name.startswith('\\'):
        # In Redmine, images are usually at /attachments/... path
        image_url = f"/attachments/download/{image_name}"
        logger.info(f"Attempting to download image: {image_name}")

        # Try to download the image
        downloaded_path = download_image(image_url, local_dir)
        if downloaded_path:
            return f"![]({downloaded_path})"
        else:
            # If download fails, keep the original format
            return f"!{image_name}!"
    else:
        # This is likely a regex capture group that wasn't meant to be an image
        return f"!{image_name}!"

def main():
    # Step 1: Get wiki pages from Redmine
    logger.info("Fetching Redmine wiki pages...")
    pages = redmine_get_wiki_pages()

    # Step 2: Prepare local directory for Gitea wiki
    local_wiki_dir = "gitea_wiki_clone"
    clone_or_init_wiki_repo(GITEA_WIKI_URL, local_wiki_dir)

    # Step 3: Push pages to Gitea
    push_to_gitea_wiki(local_wiki_dir, pages)

if __name__ == "__main__":
    main()