From 61c58e632db905b098b5b12b267ef2f55a94a621 Mon Sep 17 00:00:00 2001 From: Udo Waechter Date: Thu, 8 Jan 2026 18:00:24 +0100 Subject: [PATCH] image detection not working --- redmine2gitea-wiki/redmine_to_gitea_wiki.py | 109 +++++++++++--------- 1 file changed, 61 insertions(+), 48 deletions(-) diff --git a/redmine2gitea-wiki/redmine_to_gitea_wiki.py b/redmine2gitea-wiki/redmine_to_gitea_wiki.py index 0c4244d..1a6aba0 100644 --- a/redmine2gitea-wiki/redmine_to_gitea_wiki.py +++ b/redmine2gitea-wiki/redmine_to_gitea_wiki.py @@ -3,7 +3,6 @@ import requests import re from bs4 import BeautifulSoup import subprocess -import time import logging # Configure logging @@ -51,10 +50,10 @@ def redmine_get_wiki_pages(): try: page_response = requests.get(page_url, headers=headers) page_response.raise_for_status() - + page_soup = BeautifulSoup(page_response.content, "xml") content = page_soup.find("text").text if page_soup.find("text") else "" - + pages.append({"title": title, "parent": parent, "content": content}) logger.info(f"Fetched content for: {title}") except Exception as e: @@ -70,18 +69,18 @@ def redmine_markup_to_markdown(content, pages): # Convert links - handle both [[page]] and [page] syntax def replace_link(match): link = match.group(1) - + if link in pages: # Ensure the linked page exists # Create a relative path based on parent-child hierarchy href = f"{pages[link]['title'].replace(' ', '_')}.md" - + return f"[{link}]({href})" - + else: logger.warning(f"Linked page not found: {link}") # Return original link if linked page not found return match.group() - + # Replace [[page]] with [page](page_url) content = re.sub(r'\[\[(.*?)\]\]', replace_link, content) @@ -107,7 +106,7 @@ def redmine_markup_to_markdown(content, pages): # Convert code blocks content = re.sub(r'
(.*?)
', r'```python\n\1\n```', content, flags=re.DOTALL) - + # Convert inline code content = re.sub(r'\{\{(.*?)\}\}', r'`\1`', content) @@ -125,17 +124,17 @@ def download_image(image_url, local_dir): headers = {"X-Redmine-API-Key": REDMINE_API_KEY} response = requests.get(f"{REDMINE_URL}{image_url}", headers=headers) response.raise_for_status() - + # Extract filename from URL filename = os.path.basename(image_url) filepath = os.path.join(local_dir, "images", filename) - + # Create images directory if it doesn't exist os.makedirs(os.path.dirname(filepath), exist_ok=True) - + with open(filepath, 'wb') as f: f.write(response.content) - + logger.info(f"Downloaded image: {filename}") return f"./images/{filename}" except Exception as e: @@ -168,7 +167,7 @@ def push_to_gitea_wiki(local_dir, pages): elif os.path.isdir(item_path): import shutil shutil.rmtree(item_path) - + # Create pages as .md files for page in pages: title = page['title'] @@ -178,7 +177,7 @@ def push_to_gitea_wiki(local_dir, pages): filename = "Home.md" else: filename = f"{title.replace(' ', '_')}.md" - + filepath = os.path.join(local_dir, filename) if parent and parent in page_map: @@ -190,70 +189,84 @@ def push_to_gitea_wiki(local_dir, pages): # Parent directory exists, create subdirectory for this page page_subdir = os.path.dirname(filepath) os.makedirs(page_subdir, exist_ok=True) - + else: # This is a root level page, no need to create subdirectories pass # Convert content to Markdown and handle links markdown_content = redmine_markup_to_markdown(page['content'], page_map) - + # Find all images in the content to log them image_pattern = r'!(.*?)!' found_images = re.findall(image_pattern, markdown_content) if found_images: logger.info(f"Found images on page '{title}': {found_images}") - - # Download images referenced in the content - # First, we need to find all image URLs and download them - # Look for Redmine-style image tags like !image.png! - def replace_image(match): - image_name = match.group(1) - - # Try to find the actual image URL by searching for it in the page content - # This is a simplified approach - we'll assume the image name matches - # what's found in Redmine's image system - if image_name: - # In Redmine, images are usually at /attachments/... path - image_url = f"/attachments/download/{image_name}" - logger.info(f"Attempting to download image: {image_name}") - - downloaded_path = download_image(image_url, local_dir) - if downloaded_path: - return f"![]({downloaded_path})" - else: - # If download fails, keep the original format - return f"!{image_name}!" - return match.group(0) - - # We need to handle both !image.png! and ![alt](image) formats - # For now, we'll process !image.png! patterns specifically - markdown_content = re.sub(image_pattern, replace_image, markdown_content) - + + # Process all !image! patterns in the content + original_content = markdown_content + + # Find all image tags in the format !image_name! + image_tags = re.findall(r'!(.*?)!', original_content) + + # For each image tag, attempt to download it and replace it with Markdown syntax + for image_tag in image_tags: + if image_tag and not image_tag.startswith('\\'): + logger.info(f"Found image tag: {image_tag}") + # Replace the specific tag with our download logic + markdown_content = markdown_content.replace( + f"!{image_tag}!", + handle_image_tag(image_tag, local_dir) + ) + # Write to file with open(filepath, 'w', encoding='utf-8') as f: f.write(f"# {title}\n\n") f.write(markdown_content) - + logger.info(f"Saved page: {filename}") - + # Add, commit, and push os.chdir(local_dir) subprocess.run(["git", "add", "."], check=True) subprocess.run(["git", "commit", "-m", "'Import Redmine wiki pages'"], check=False) subprocess.run(["git", "push", "origin", "main"], check=True) - + logger.info("✅ Wiki pages pushed to Gitea!") +def handle_image_tag(image_name, local_dir): + """Handle replacement of image tags with downloaded images""" + # Check if the image has a valid extension (.png, .jpg, or .jpeg) + valid_extensions = ('.png', '.jpg', '.jpeg') + if not any(image_name.lower().endswith(ext) for ext in valid_extensions): + logger.warning(f"Invalid image extension in tag: {image_name}") + return f"!{image_name}!" # Return original format for invalid extensions + + if image_name and not image_name.startswith('\\'): + # In Redmine, images are usually at /attachments/... path + image_url = f"/attachments/download/{image_name}" + logger.info(f"Attempting to download image: {image_name}") + + # Try to download the image + downloaded_path = download_image(image_url, local_dir) + if downloaded_path: + return f"![]({downloaded_path})" + else: + # If download fails, keep the original format + return f"!{image_name}!" + else: + # This is likely a regex capture group that wasn't meant to be an image + return f"!{image_name}!" + def main(): # Step 1: Get wiki pages from Redmine logger.info("Fetching Redmine wiki pages...") pages = redmine_get_wiki_pages() - + # Step 2: Prepare local directory for Gitea wiki local_wiki_dir = "gitea_wiki_clone" clone_or_init_wiki_repo(GITEA_WIKI_URL, local_wiki_dir) - + # Step 3: Push pages to Gitea push_to_gitea_wiki(local_wiki_dir, pages)