File size: 4,494 Bytes
1314bf5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python3
"""
File Upload Diagnostic Script
This script helps debug why some images are not being processed.
"""

import os
from pathlib import Path
from typing import Dict, List

def analyze_uploaded_files(folder_path: str) -> None:
    """
    Analyze uploaded files to understand why some images might not be processed.
    
    Args:
        folder_path: Path to the uploaded folder
    """
    print("πŸ” File Upload Diagnostic Tool")
    print("=" * 50)
    
    if not os.path.exists(folder_path):
        print(f"❌ Folder not found: {folder_path}")
        return
    
    # Get all files in the folder
    all_files = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            full_path = os.path.join(root, file)
            all_files.append(Path(full_path))
    
    print(f"πŸ“ Total files found: {len(all_files)}")
    print("\nπŸ“‹ All files:")
    for i, file_path in enumerate(all_files, 1):
        print(f"  {i}. {file_path.name} (ext: {file_path.suffix.lower()})")
    
    # Analyze image files
    image_exts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']
    print(f"\nπŸ–ΌοΈ  Looking for image extensions: {image_exts}")
    
    image_files = []
    non_image_files = []
    
    for file_path in all_files:
        if any(file_path.suffix.lower().endswith(ext) for ext in image_exts):
            image_files.append(file_path)
        else:
            non_image_files.append(file_path)
    
    print(f"\nβœ… Image files detected ({len(image_files)}):")
    for i, img in enumerate(image_files, 1):
        print(f"  {i}. {img.name}")
    
    print(f"\nπŸ“„ Non-image files ({len(non_image_files)}):")
    for i, file in enumerate(non_image_files, 1):
        print(f"  {i}. {file.name} (ext: {file.suffix.lower()})")
    
    # Check for CSV files
    csv_files = [f for f in all_files if f.suffix.lower() == '.csv']
    print(f"\nπŸ“Š CSV files found ({len(csv_files)}):")
    for i, csv in enumerate(csv_files, 1):
        print(f"  {i}. {csv.name}")
    
    # If CSV exists, check its content
    if csv_files:
        try:
            import pandas as pd
            df = pd.read_csv(csv_files[0])
            print(f"\nπŸ“ˆ CSV Analysis for '{csv_files[0].name}':")
            print(f"  - Rows: {len(df)}")
            print(f"  - Columns: {list(df.columns)}")
            
            if 'Image Name' in df.columns:
                image_names_in_csv = df['Image Name'].tolist()
                print(f"  - Image names in CSV: {len(image_names_in_csv)}")
                
                # Check which images from CSV actually exist as files
                existing_images = []
                missing_images = []
                
                for img_name in image_names_in_csv:
                    if any(img.name == img_name for img in image_files):
                        existing_images.append(img_name)
                    else:
                        missing_images.append(img_name)
                
                print(f"\nπŸ”— CSV-to-File Matching:")
                print(f"  - Images in CSV that exist as files: {len(existing_images)}")
                print(f"  - Images in CSV that are missing: {len(missing_images)}")
                
                if existing_images:
                    print("  βœ… Matching files:")
                    for img in existing_images:
                        print(f"    - {img}")
                
                if missing_images:
                    print("  ❌ Missing files:")
                    for img in missing_images:
                        print(f"    - {img}")
                        
        except Exception as e:
            print(f"  ❌ Error reading CSV: {e}")
    
    # Summary
    print(f"\nπŸ“Š SUMMARY:")
    print(f"  - Total files uploaded: {len(all_files)}")
    print(f"  - Image files detected: {len(image_files)}")
    print(f"  - CSV files: {len(csv_files)}")
    
    if csv_files and 'df' in locals():
        if 'Image Name' in df.columns:
            print(f"  - Images that will be processed: {len(existing_images)}")
        else:
            print(f"  - CSV exists but no 'Image Name' column - will process all {len(image_files)} images")
    else:
        print(f"  - No CSV - will process all {len(image_files)} images")

if __name__ == "__main__":
    print("Please provide the path to your uploaded folder:")
    folder_path = input("Folder path: ").strip()
    analyze_uploaded_files(folder_path)