ETSY LABEL AUTOMATION CODE

import sys
import re
import os
import tkinter as tk
from tkinter import filedialog, messagebox
import io
from pypdf import PdfReader, PdfWriter
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter

# --- CONFIGURATION ---
LABEL_WIDTH = 288  # 4 inches
LABEL_HEIGHT = 432 # 6 inches

# --- LAYOUT SETTINGS (V5 Adjusted) ---
# Box is bottom-left. 
# Height reduced to 42pts to stay below tracking numbers.
# Width limited to 200pts to avoid the QR code.
BOX_X = 3
BOX_Y = 3
BOX_WIDTH = 200 
BOX_HEIGHT = 42 

def extract_order_data(packing_slip_path):
    """
    Reads the Packing Slip PDF and extracts:
    1. Customer Name (for safety check)
    2. SKUs and Quantities
    Returns a list of dictionaries.
    """
    reader = PdfReader(packing_slip_path)
    print(f"Reading {len(reader.pages)} packing slips...")
    
    orders = []

    for i, page in enumerate(reader.pages):
        text = page.extract_text()
        lines = text.split('\n')
        
        # --- 1. Extract Name (Visual Safety Check) ---
        customer_name = "Unknown"
        for idx, line in enumerate(lines):
            if "Ship to" in line:
                # The name is usually on the very next line
                if idx + 1 < len(lines):
                    customer_name = lines[idx+1].strip()
                break
        
        # --- 2. Extract Items ---
        items_found = []
        current_sku = None
        
        for line in lines:
            # Look for SKU
            if "SKU:" in line:
                current_sku = line.replace("SKU:", "").strip()
            
            # Look for Quantity (e.g., "1 x $47.98")
            if current_sku and (" x $" in line or line.strip().endswith(" x")):
                qty_match = re.search(r'(\d+)\s*x', line)
                qty = qty_match.group(1) if qty_match else "1"
                
                items_found.append(f"({qty}) {current_sku}")
                current_sku = None

        # Fallback if parsing fails
        if not items_found:
             skus = re.findall(r'SKU:\s*(.*)', text)
             for s in skus:
                 items_found.append(f"(1) {s}")

        orders.append({
            'name': customer_name,
            'items': items_found
        })

    return orders

def create_overlay(order_data):
    """
    Creates the overlay with Customer Name (Safety) + Items
    """
    packet = io.BytesIO()
    can = canvas.Canvas(packet, pagesize=(LABEL_WIDTH, LABEL_HEIGHT))
    
    # --- Draw Background Box ---
    can.setFillColor("white")
    can.setStrokeColor("black")
    # White box with thin black border
    can.rect(BOX_X, BOX_Y, BOX_WIDTH, BOX_HEIGHT, fill=1, stroke=1)
    
    # --- Draw Safety Name Header ---
    can.setFillColor("black")
    header_size = 7
    can.setFont("Helvetica-Bold", header_size)
    
    # Name Position: Top of the box, padded slightly down
    name_y = BOX_Y + BOX_HEIGHT - header_size - 3
    can.drawString(BOX_X + 4, name_y, f"Order: {order_data['name'][:35]}") # Limit name length
    
    # --- Draw Separator Line ---
    line_y = name_y - 3
    can.setLineWidth(0.5)
    can.line(BOX_X, line_y, BOX_X + BOX_WIDTH, line_y)

    # --- Draw Items ---
    sku_list = order_data['items']
    num_lines = len(sku_list)
    
    # Calculate available vertical space for items
    # Bottom of box to the separator line
    available_item_height = line_y - BOX_Y - 2
    
    # Dynamic Sizing for Items
    # We try to stick to 9pt, but shrink if there are many items
    optimal_size = 9
    if num_lines > 0:
        # Calculate needed size to fit all lines
        calculated_size = available_item_height / (num_lines * 1.05)
        optimal_size = min(9, calculated_size)
    
    optimal_size = max(5, optimal_size) # Hard floor at 5pt to keep it readable
    can.setFont("Helvetica-Bold", optimal_size)
    
    # Start drawing items just below the line
    text_y = line_y - optimal_size - 2
    
    for item in sku_list:
        # Horizontal fit: Truncate if too long
        # Approx char width = size * 0.55
        max_chars = int((BOX_WIDTH - 6) / (optimal_size * 0.55))
        if len(item) > max_chars:
            item = item[:max_chars-2] + ".."
            
        can.drawString(BOX_X + 4, text_y, item)
        text_y -= (optimal_size * 1.05) # Move cursor down

    can.save()
    packet.seek(0)
    return PdfReader(packet)

def select_file(title, filetypes):
    root = tk.Tk()
    root.withdraw()
    return filedialog.askopenfilename(title=title, filetypes=filetypes)

def save_file(title, default_name):
    root = tk.Tk()
    root.withdraw()
    return filedialog.asksaveasfilename(title=title, initialfile=default_name, defaultextension=".pdf", filetypes=[("PDF Files", "*.pdf")])

def main():
    print("--- SEQUENTIAL LABEL PACKER V5 (Compact Layout) ---")
    
    slip_path = select_file("1. Select PACKING SLIPS", [("PDF Files", "*.pdf")])
    if not slip_path: return

    label_path = select_file("2. Select SHIPPING LABELS", [("PDF Files", "*.pdf")])
    if not label_path: return

    try:
        # 1. Get Data
        orders = extract_order_data(slip_path)
        label_reader = PdfReader(label_path)
        writer = PdfWriter()

        # 2. Validation
        slip_count = len(orders)
        label_count = len(label_reader.pages)

        if slip_count != label_count:
            msg = f"Count Mismatch!\nSlips: {slip_count}\nLabels: {label_count}\n\nThe script requires a 1-to-1 match. Proceed with caution?"
            if not messagebox.askyesno("Warning", msg):
                return

        # 3. Process
        print("Merging labels sequentially...")
        count = min(slip_count, label_count)

        for i in range(count):
            label_page = label_reader.pages[i]
            order_data = orders[i]
            
            # Create overlay
            overlay_pdf = create_overlay(order_data)
            label_page.merge_page(overlay_pdf.pages[0])
            
            writer.add_page(label_page)

        # 4. Save
        output_path = save_file("3. Save Ready-to-Print Labels", "Final_Labels_V5")
        if output_path:
            with open(output_path, "wb") as f:
                writer.write(f)
            messagebox.showinfo("Success", f"Created {count} labels.\n\nSAFETY CHECK:\nAlways verify the 'Order:' name matches the Ship To name.")

    except Exception as e:
        messagebox.showerror("Error", f"An error occurred:\n{str(e)}")
        print(f"Critical Error: {e}")

if __name__ == "__main__":
    main()

AMAZON LABEL AUTOMATION CODE

import sys
import re
import os
import tkinter as tk
from tkinter import filedialog, messagebox
import io
import subprocess

# --- 1. CONFIGURATION & AUTO-DISCOVERY ---
LABEL_WIDTH = 288
LABEL_HEIGHT = 432
BOX_X = 5
BOX_Y = 5
BOX_WIDTH = 278
BOX_HEIGHT = 50

# Global paths
TESSERACT_EXE = None
POPPLER_BIN = None

def find_ocr_tools():
    """
    Hunts for Tesseract and Poppler on the C: drive to fix pathing errors.
    """
    global TESSERACT_EXE, POPPLER_BIN
    print("--- DIAGNOSTIC: FINDING OCR TOOLS ---")

    # 1. Look for Tesseract
    possible_tess = [
        r"C:\Program Files\Tesseract-OCR\tesseract.exe",
        r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe",
        r"C:\Tesseract-OCR\tesseract.exe"
    ]
    for p in possible_tess:
        if os.path.exists(p):
            TESSERACT_EXE = p
            print(f"✅ FOUND TESSERACT: {p}")
            break
    
    if not TESSERACT_EXE:
        print("❌ CRITICAL: Tesseract EXE not found.")
        print("   -> Install from: https://github.com/UB-Mannheim/tesseract/wiki")

    # 2. Look for Poppler
    # Check standard path first
    if os.path.exists(r"C:\poppler-24.08.0\Library\bin"):
        POPPLER_BIN = r"C:\poppler-24.08.0\Library\bin"
    else:
        # Search C root for any folder starting with 'poppler'
        try:
            root_dirs = os.listdir("C:\\")
            for d in root_dirs:
                if "poppler" in d.lower():
                    candidate = os.path.join("C:\\", d, "Library", "bin")
                    if os.path.exists(candidate):
                        POPPLER_BIN = candidate
                        break
        except:
            pass

    if POPPLER_BIN:
        print(f"✅ FOUND POPPLER:   {POPPLER_BIN}")
    else:
        print("❌ CRITICAL: Poppler bin folder not found.")
        print("   -> Ensure you extracted the zip to C:\\ and the folder name contains 'poppler'.")

    return (TESSERACT_EXE is not None) and (POPPLER_BIN is not None)

# --- IMPORTS THAT MIGHT FAIL ---
try:
    from pypdf import PdfReader, PdfWriter
    from reportlab.pdfgen import canvas
    from reportlab.lib.pagesizes import letter
    import pytesseract
    from pdf2image import convert_from_path
except ImportError as e:
    print(f"\n❌ LIBRARY ERROR: {e}")
    print("   -> Run this command in PowerShell:  & C:/ProgramData/miniconda3/python.exe -m pip install pytesseract pdf2image pypdf reportlab")
    sys.exit()

# --- PROCESSING LOGIC ---

def clean_ocr_text(text):
    """Clean up OCR garbage."""
    if not text: return ""
    text = text.upper()
    # Replace common OCR misreads for numbers
    # O -> 0, I/l -> 1, S -> 5 (only do this contextually if possible, but global replace helps match zips)
    trans = str.maketrans("OIl", "011") 
    # Only apply to digit-like words? keeping it simple for matching
    return text

def get_tokens_and_numbers(text):
    """Returns a set of text tokens and a set of numbers."""
    if not text: return set(), set()
    
    # Normalize
    text = text.upper().replace('\n', ' ').replace('-', ' ').replace('.', ' ')
    
    # Extract Numbers (Digits only)
    numbers = set(re.findall(r'\d+', text))
    
    # Extract Text Tokens (Alphanumeric, remove stopwords)
    tokens = set(re.findall(r'[A-Z0-9]+', text))
    stop_words = {'SHIP', 'TO', 'USPS', 'GROUND', 'ADVANTAGE', 'POSTAGE', 'PAID', 'WEIGHT', 'OZ', 'LB', 'ZONE', 'COMMERCIAL', 'BASE', 'PRIORITY', 'MAIL'}
    tokens = {t for t in tokens if t not in stop_words and len(t) > 2}
    
    return tokens, numbers

def extract_database(slip_path):
    print("\n--- READING PACKING SLIPS ---")
    reader = PdfReader(slip_path)
    db = []
    
    for i, page in enumerate(reader.pages):
        text = page.extract_text()
        if not text: continue
        
        lines = text.split('\n')
        name = "Unknown"
        raw_address = ""
        items = []
        
        # 1. Parse Name/Address
        for idx, line in enumerate(lines):
            clean = line.strip()
            if clean == "Ship To:" and idx + 1 < len(lines):
                name = lines[idx+1].strip()
                # Get address dump (next 3 lines)
                addr_parts = []
                for x in range(2, 6):
                    if idx+x < len(lines) and "Order ID" not in lines[idx+x]:
                        addr_parts.append(lines[idx+x].strip())
                raw_address = " ".join(addr_parts)
                break
        
        # Fallback Name
        if name == "Unknown":
            for idx, line in enumerate(lines):
                if "Shipping Address:" in line and idx + 1 < len(lines):
                    name = lines[idx+1].strip()
                    break

        # 2. Parse Items
        # Look for "SKU:" and grab quantity from previous line
        # Also clean up SKU to be just the code
        temp_qty = "1"
        for line in lines:
            clean = line.strip()
            
            # Check for quantity line (e.g. "2   Ohio State...")
            # Regex: Start of line, digits, space, letter
            qty_match = re.match(r'^(\d+)\s+[A-Za-z]', clean)
            if qty_match:
                temp_qty = qty_match.group(1)
            
            if "SKU:" in clean:
                parts = clean.split("SKU:")
                if len(parts) > 1:
                    sku_clean = parts[1].strip().split(" ")[0] # Grab first word after SKU:
                    items.append(f"({temp_qty}) {sku_clean}")
                    temp_qty = "1"

        if items:
            tokens, numbers = get_tokens_and_numbers(name + " " + raw_address)
            db.append({
                'id': i,
                'name': name,
                'tokens': tokens,
                'numbers': numbers,
                'skus': items,
                'matched': False
            })
            
    print(f"Loaded {len(db)} orders from Packing Slips.")
    return db

def perform_ocr(pdf_path, page_num):
    """
    Force OCR on a specific page using the discovered tools.
    """
    try:
        # Set the tesseract command manually found earlier
        pytesseract.pytesseract.tesseract_cmd = TESSERACT_EXE
        
        # Convert PDF to image
        images = convert_from_path(
            pdf_path, 
            first_page=page_num+1, 
            last_page=page_num+1, 
            poppler_path=POPPLER_BIN,
            dpi=300 # High DPI for better reading
        )
        
        if not images: return ""
        
        # Read text
        text = pytesseract.image_to_string(images[0])
        return text
    except Exception as e:
        print(f"   [OCR ERROR Page {page_num+1}]: {e}")
        return ""

def find_match(label_text, db):
    l_tokens, l_numbers = get_tokens_and_numbers(label_text)
    
    best_entry = None
    best_score = 0
    
    for order in db:
        if order['matched']: continue
        
        score = 0
        
        # Number Match (Zip, Street) - weighted heavily
        # We look for numbers that appear in both sets
        common_nums = order['numbers'].intersection(l_numbers)
        for num in common_nums:
            if len(num) >= 5: score += 50  # Zip code
            elif len(num) >= 3: score += 20 # Street Num
            elif len(num) > 0: score += 2   # Tiny numbers
            
        # Name/Text Match
        common_tokens = order['tokens'].intersection(l_tokens)
        score += (len(common_tokens) * 10)
        
        if score > best_score:
            best_score = score
            best_entry = order
            
    if best_score >= 35: # Threshold
        return best_entry
    return None

def create_overlay(name, skus, is_error=False):
    packet = io.BytesIO()
    can = canvas.Canvas(packet, pagesize=(LABEL_WIDTH, LABEL_HEIGHT))
    
    can.setFillColor("white")
    can.setStrokeColor("black")
    can.rect(BOX_X, BOX_Y, BOX_WIDTH, BOX_HEIGHT, fill=1, stroke=1)
    
    can.setFillColor("red" if is_error else "black")
    can.setFont("Helvetica-Bold", 8)
    
    header = f"TO: {name[:40]}" if not is_error else name
    can.drawString(BOX_X + 5, BOX_Y + BOX_HEIGHT - 10, header)
    
    can.setLineWidth(0.5)
    can.line(BOX_X, BOX_Y + BOX_HEIGHT - 14, BOX_X + BOX_WIDTH, BOX_Y + BOX_HEIGHT - 14)
    
    font_size = 9
    if len(skus) > 2: font_size = 8
    if len(skus) > 4: font_size = 6
    can.setFont("Helvetica-Bold", font_size)
    
    text_y = BOX_Y + BOX_HEIGHT - 24
    for s in skus:
        if len(s) > 55: s = s[:53] + "..."
        can.drawString(BOX_X + 5, text_y, s)
        text_y -= (font_size + 2)
        
    can.save()
    packet.seek(0)
    return PdfReader(packet)

# --- MAIN ---
def main():
    root = tk.Tk()
    root.withdraw()
    
    # 1. CHECK TOOLS
    tools_ok = find_ocr_tools()
    if not tools_ok:
        messagebox.showerror("Missing Tools", "The script could not find Tesseract or Poppler.\nPlease check the console for instructions.")
        return

    # 2. SELECT FILES
    slip_path = filedialog.askopenfilename(title="1. Select PACKING SLIPS")
    if not slip_path: return
    
    label_path = filedialog.askopenfilename(title="2. Select SHIPPING LABELS")
    if not label_path: return
    
    try:
        # 3. LOAD DB
        order_db = extract_database(slip_path)
        
        # 4. PROCESS
        label_reader = PdfReader(label_path)
        writer = PdfWriter()
        
        print("\n--- MATCHING LABELS ---")
        matches = 0
        errors = 0
        
        for i, page in enumerate(label_reader.pages):
            # Try basic extract
            text = ""
            try:
                text = page.extract_text()
            except: pass
            
            # If text is empty/short, USE OCR
            if not text or len(text.strip()) < 10:
                print(f"   Page {i+1} is an image. Scanning...")
                text = perform_ocr(label_path, i)
                # Clean OCR result slightly to help matching
                text = clean_ocr_text(text)
            
            match = find_match(text, order_db)
            
            if match:
                match['matched'] = True
                ov = create_overlay(match['name'], match['skus'])
                page.merge_page(ov.pages[0])
                matches += 1
                print(f"   Page {i+1}: MATCH -> {match['name']}")
            else:
                ov = create_overlay("NO MATCH FOUND", ["Check Manually"], is_error=True)
                page.merge_page(ov.pages[0])
                errors += 1
                print(f"   Page {i+1}: FAIL")
                
            writer.add_page(page)
            
        print(f"\nDONE. Matches: {matches}, Errors: {errors}")
        save_file = filedialog.asksaveasfilename(title="Save Final PDF", initialfile="Amazon_Labels_Complete.pdf", defaultextension=".pdf")
        if save_file:
            with open(save_file, "wb") as f:
                writer.write(f)
            messagebox.showinfo("Success", f"Finished!\nMatches: {matches}\nErrors: {errors}")
            
    except Exception as e:
        print(f"CRITICAL ERROR: {e}")
        messagebox.showerror("Error", str(e))

if __name__ == "__main__":
    main()
Previous
Previous

Resume

Next
Next

HMI Automation DEMO