From 61c58e632db905b098b5b12b267ef2f55a94a621 Mon Sep 17 00:00:00 2001
From: Udo Waechter <udo@maketank.net>
Date: Thu, 8 Jan 2026 18:00:24 +0100
Subject: [PATCH] image detection not working

---
 redmine2gitea-wiki/redmine_to_gitea_wiki.py | 109 +++++++++++---------
 1 file changed, 61 insertions(+), 48 deletions(-)
diff --git a/redmine2gitea-wiki/redmine_to_gitea_wiki.py b/redmine2gitea-wiki/redmine_to_gitea_wiki.py
index 0c4244d..1a6aba0 100644
--- a/redmine2gitea-wiki/redmine_to_gitea_wiki.py
+++ b/redmine2gitea-wiki/redmine_to_gitea_wiki.py
@@ -3,7 +3,6 @@ import requests
 import re
 from bs4 import BeautifulSoup
 import subprocess
-import time
 import logging
 
 # Configure logging
@@ -51,10 +50,10 @@ def redmine_get_wiki_pages():
         try:
             page_response = requests.get(page_url, headers=headers)
             page_response.raise_for_status()
-            
+
             page_soup = BeautifulSoup(page_response.content, "xml")
             content = page_soup.find("text").text if page_soup.find("text") else ""
-            
+
             pages.append({"title": title, "parent": parent, "content": content})
             logger.info(f"Fetched content for: {title}")
         except Exception as e:
@@ -70,18 +69,18 @@ def redmine_markup_to_markdown(content, pages):
     # Convert links - handle both [[page]] and [page] syntax
     def replace_link(match):
         link = match.group(1)
-        
+
         if link in pages:  # Ensure the linked page exists
             # Create a relative path based on parent-child hierarchy
             href = f"{pages[link]['title'].replace(' ', '_')}.md"
-            
+
             return f"[{link}]({href})"
-        
+
         else:
             logger.warning(f"Linked page not found: {link}")
             # Return original link if linked page not found
             return match.group()
-    
+
     # Replace [[page]] with [page](page_url)
     content = re.sub(r'\[\[(.*?)\]\]', replace_link, content)
 
@@ -107,7 +106,7 @@ def redmine_markup_to_markdown(content, pages):
 
     # Convert code blocks
     content = re.sub(r'<pre>(.*?)</pre>', r'```python\n\1\n```', content, flags=re.DOTALL)
-    
+
     # Convert inline code
     content = re.sub(r'\{\{(.*?)\}\}', r'`\1`', content)
 
@@ -125,17 +124,17 @@ def download_image(image_url, local_dir):
         headers = {"X-Redmine-API-Key": REDMINE_API_KEY}
         response = requests.get(f"{REDMINE_URL}{image_url}", headers=headers)
         response.raise_for_status()
-        
+
         # Extract filename from URL
         filename = os.path.basename(image_url)
         filepath = os.path.join(local_dir, "images", filename)
-        
+
         # Create images directory if it doesn't exist
         os.makedirs(os.path.dirname(filepath), exist_ok=True)
-        
+
         with open(filepath, 'wb') as f:
             f.write(response.content)
-            
+
         logger.info(f"Downloaded image: {filename}")
         return f"./images/{filename}"
     except Exception as e:
@@ -168,7 +167,7 @@ def push_to_gitea_wiki(local_dir, pages):
             elif os.path.isdir(item_path):
                 import shutil
                 shutil.rmtree(item_path)
-    
+
     # Create pages as .md files
     for page in pages:
         title = page['title']
@@ -178,7 +177,7 @@ def push_to_gitea_wiki(local_dir, pages):
             filename = "Home.md"
         else:
             filename = f"{title.replace(' ', '_')}.md"
-        
+
         filepath = os.path.join(local_dir, filename)
 
         if parent and parent in page_map:
@@ -190,70 +189,84 @@ def push_to_gitea_wiki(local_dir, pages):
                 # Parent directory exists, create subdirectory for this page
                 page_subdir = os.path.dirname(filepath)
                 os.makedirs(page_subdir, exist_ok=True)
-            
+
         else:
             # This is a root level page, no need to create subdirectories
             pass
 
         # Convert content to Markdown and handle links
         markdown_content = redmine_markup_to_markdown(page['content'], page_map)
-        
+
         # Find all images in the content to log them
         image_pattern = r'!(.*?)!'
         found_images = re.findall(image_pattern, markdown_content)
         if found_images:
             logger.info(f"Found images on page '{title}': {found_images}")
-        
-        # Download images referenced in the content
-        # First, we need to find all image URLs and download them
-        # Look for Redmine-style image tags like !image.png!
-        def replace_image(match):
-            image_name = match.group(1)
-            
-            # Try to find the actual image URL by searching for it in the page content
-            # This is a simplified approach - we'll assume the image name matches 
-            # what's found in Redmine's image system
-            if image_name:
-                # In Redmine, images are usually at /attachments/... path
-                image_url = f"/attachments/download/{image_name}"
-                logger.info(f"Attempting to download image: {image_name}")
-                
-                downloaded_path = download_image(image_url, local_dir)
-                if downloaded_path:
-                    return f"![]({downloaded_path})"
-                else:
-                    # If download fails, keep the original format
-                    return f"!{image_name}!"
-            return match.group(0)
-        
-        # We need to handle both !image.png! and ![alt](image) formats
-        # For now, we'll process !image.png! patterns specifically
-        markdown_content = re.sub(image_pattern, replace_image, markdown_content)
-        
+
+        # Process all !image! patterns in the content
+        original_content = markdown_content
+
+        # Find all image tags in the format !image_name!
+        image_tags = re.findall(r'!(.*?)!', original_content)
+
+        # For each image tag, attempt to download it and replace it with Markdown syntax
+        for image_tag in image_tags:
+            if image_tag and not image_tag.startswith('\\'):
+                logger.info(f"Found image tag: {image_tag}")
+                # Replace the specific tag with our download logic
+                markdown_content = markdown_content.replace(
+                    f"!{image_tag}!",
+                    handle_image_tag(image_tag, local_dir)
+                )
+
         # Write to file
         with open(filepath, 'w', encoding='utf-8') as f:
             f.write(f"# {title}\n\n")
             f.write(markdown_content)
-        
+
         logger.info(f"Saved page: {filename}")
-    
+
     # Add, commit, and push
     os.chdir(local_dir)
     subprocess.run(["git", "add", "."], check=True)
     subprocess.run(["git", "commit", "-m", "'Import Redmine wiki pages'"], check=False)
     subprocess.run(["git", "push", "origin", "main"], check=True)
-    
+
     logger.info("✅ Wiki pages pushed to Gitea!")
 
+def handle_image_tag(image_name, local_dir):
+    """Handle replacement of image tags with downloaded images"""
+    # Check if the image has a valid extension (.png, .jpg, or .jpeg)
+    valid_extensions = ('.png', '.jpg', '.jpeg')
+    if not any(image_name.lower().endswith(ext) for ext in valid_extensions):
+        logger.warning(f"Invalid image extension in tag: {image_name}")
+        return f"!{image_name}!"  # Return original format for invalid extensions
+
+    if image_name and not image_name.startswith('\\'):
+        # In Redmine, images are usually at /attachments/... path
+        image_url = f"/attachments/download/{image_name}"
+        logger.info(f"Attempting to download image: {image_name}")
+
+        # Try to download the image
+        downloaded_path = download_image(image_url, local_dir)
+        if downloaded_path:
+            return f"![]({downloaded_path})"
+        else:
+            # If download fails, keep the original format
+            return f"!{image_name}!"
+    else:
+        # This is likely a regex capture group that wasn't meant to be an image
+        return f"!{image_name}!"
+
 def main():
     # Step 1: Get wiki pages from Redmine
     logger.info("Fetching Redmine wiki pages...")
     pages = redmine_get_wiki_pages()
-    
+
     # Step 2: Prepare local directory for Gitea wiki
     local_wiki_dir = "gitea_wiki_clone"
     clone_or_init_wiki_repo(GITEA_WIKI_URL, local_wiki_dir)
-    
+
     # Step 3: Push pages to Gitea
     push_to_gitea_wiki(local_wiki_dir, pages)