ETSY LABEL AUTOMATION CODE
import sys
import re
import os
import tkinter as tk
from tkinter import filedialog, messagebox
import io
from pypdf import PdfReader, PdfWriter
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
# --- CONFIGURATION ---
LABEL_WIDTH = 288 # 4 inches
LABEL_HEIGHT = 432 # 6 inches
# --- LAYOUT SETTINGS (V5 Adjusted) ---
# Box is bottom-left.
# Height reduced to 42pts to stay below tracking numbers.
# Width limited to 200pts to avoid the QR code.
BOX_X = 3
BOX_Y = 3
BOX_WIDTH = 200
BOX_HEIGHT = 42
def extract_order_data(packing_slip_path):
"""
Reads the Packing Slip PDF and extracts:
1. Customer Name (for safety check)
2. SKUs and Quantities
Returns a list of dictionaries.
"""
reader = PdfReader(packing_slip_path)
print(f"Reading {len(reader.pages)} packing slips...")
orders = []
for i, page in enumerate(reader.pages):
text = page.extract_text()
lines = text.split('\n')
# --- 1. Extract Name (Visual Safety Check) ---
customer_name = "Unknown"
for idx, line in enumerate(lines):
if "Ship to" in line:
# The name is usually on the very next line
if idx + 1 < len(lines):
customer_name = lines[idx+1].strip()
break
# --- 2. Extract Items ---
items_found = []
current_sku = None
for line in lines:
# Look for SKU
if "SKU:" in line:
current_sku = line.replace("SKU:", "").strip()
# Look for Quantity (e.g., "1 x $47.98")
if current_sku and (" x $" in line or line.strip().endswith(" x")):
qty_match = re.search(r'(\d+)\s*x', line)
qty = qty_match.group(1) if qty_match else "1"
items_found.append(f"({qty}) {current_sku}")
current_sku = None
# Fallback if parsing fails
if not items_found:
skus = re.findall(r'SKU:\s*(.*)', text)
for s in skus:
items_found.append(f"(1) {s}")
orders.append({
'name': customer_name,
'items': items_found
})
return orders
def create_overlay(order_data):
"""
Creates the overlay with Customer Name (Safety) + Items
"""
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=(LABEL_WIDTH, LABEL_HEIGHT))
# --- Draw Background Box ---
can.setFillColor("white")
can.setStrokeColor("black")
# White box with thin black border
can.rect(BOX_X, BOX_Y, BOX_WIDTH, BOX_HEIGHT, fill=1, stroke=1)
# --- Draw Safety Name Header ---
can.setFillColor("black")
header_size = 7
can.setFont("Helvetica-Bold", header_size)
# Name Position: Top of the box, padded slightly down
name_y = BOX_Y + BOX_HEIGHT - header_size - 3
can.drawString(BOX_X + 4, name_y, f"Order: {order_data['name'][:35]}") # Limit name length
# --- Draw Separator Line ---
line_y = name_y - 3
can.setLineWidth(0.5)
can.line(BOX_X, line_y, BOX_X + BOX_WIDTH, line_y)
# --- Draw Items ---
sku_list = order_data['items']
num_lines = len(sku_list)
# Calculate available vertical space for items
# Bottom of box to the separator line
available_item_height = line_y - BOX_Y - 2
# Dynamic Sizing for Items
# We try to stick to 9pt, but shrink if there are many items
optimal_size = 9
if num_lines > 0:
# Calculate needed size to fit all lines
calculated_size = available_item_height / (num_lines * 1.05)
optimal_size = min(9, calculated_size)
optimal_size = max(5, optimal_size) # Hard floor at 5pt to keep it readable
can.setFont("Helvetica-Bold", optimal_size)
# Start drawing items just below the line
text_y = line_y - optimal_size - 2
for item in sku_list:
# Horizontal fit: Truncate if too long
# Approx char width = size * 0.55
max_chars = int((BOX_WIDTH - 6) / (optimal_size * 0.55))
if len(item) > max_chars:
item = item[:max_chars-2] + ".."
can.drawString(BOX_X + 4, text_y, item)
text_y -= (optimal_size * 1.05) # Move cursor down
can.save()
packet.seek(0)
return PdfReader(packet)
def select_file(title, filetypes):
root = tk.Tk()
root.withdraw()
return filedialog.askopenfilename(title=title, filetypes=filetypes)
def save_file(title, default_name):
root = tk.Tk()
root.withdraw()
return filedialog.asksaveasfilename(title=title, initialfile=default_name, defaultextension=".pdf", filetypes=[("PDF Files", "*.pdf")])
def main():
print("--- SEQUENTIAL LABEL PACKER V5 (Compact Layout) ---")
slip_path = select_file("1. Select PACKING SLIPS", [("PDF Files", "*.pdf")])
if not slip_path: return
label_path = select_file("2. Select SHIPPING LABELS", [("PDF Files", "*.pdf")])
if not label_path: return
try:
# 1. Get Data
orders = extract_order_data(slip_path)
label_reader = PdfReader(label_path)
writer = PdfWriter()
# 2. Validation
slip_count = len(orders)
label_count = len(label_reader.pages)
if slip_count != label_count:
msg = f"Count Mismatch!\nSlips: {slip_count}\nLabels: {label_count}\n\nThe script requires a 1-to-1 match. Proceed with caution?"
if not messagebox.askyesno("Warning", msg):
return
# 3. Process
print("Merging labels sequentially...")
count = min(slip_count, label_count)
for i in range(count):
label_page = label_reader.pages[i]
order_data = orders[i]
# Create overlay
overlay_pdf = create_overlay(order_data)
label_page.merge_page(overlay_pdf.pages[0])
writer.add_page(label_page)
# 4. Save
output_path = save_file("3. Save Ready-to-Print Labels", "Final_Labels_V5")
if output_path:
with open(output_path, "wb") as f:
writer.write(f)
messagebox.showinfo("Success", f"Created {count} labels.\n\nSAFETY CHECK:\nAlways verify the 'Order:' name matches the Ship To name.")
except Exception as e:
messagebox.showerror("Error", f"An error occurred:\n{str(e)}")
print(f"Critical Error: {e}")
if __name__ == "__main__":
main()
AMAZON LABEL AUTOMATION CODE
import sys
import re
import os
import tkinter as tk
from tkinter import filedialog, messagebox
import io
import subprocess
# --- 1. CONFIGURATION & AUTO-DISCOVERY ---
LABEL_WIDTH = 288
LABEL_HEIGHT = 432
BOX_X = 5
BOX_Y = 5
BOX_WIDTH = 278
BOX_HEIGHT = 50
# Global paths
TESSERACT_EXE = None
POPPLER_BIN = None
def find_ocr_tools():
"""
Hunts for Tesseract and Poppler on the C: drive to fix pathing errors.
"""
global TESSERACT_EXE, POPPLER_BIN
print("--- DIAGNOSTIC: FINDING OCR TOOLS ---")
# 1. Look for Tesseract
possible_tess = [
r"C:\Program Files\Tesseract-OCR\tesseract.exe",
r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe",
r"C:\Tesseract-OCR\tesseract.exe"
]
for p in possible_tess:
if os.path.exists(p):
TESSERACT_EXE = p
print(f"✅ FOUND TESSERACT: {p}")
break
if not TESSERACT_EXE:
print("❌ CRITICAL: Tesseract EXE not found.")
print(" -> Install from: https://github.com/UB-Mannheim/tesseract/wiki")
# 2. Look for Poppler
# Check standard path first
if os.path.exists(r"C:\poppler-24.08.0\Library\bin"):
POPPLER_BIN = r"C:\poppler-24.08.0\Library\bin"
else:
# Search C root for any folder starting with 'poppler'
try:
root_dirs = os.listdir("C:\\")
for d in root_dirs:
if "poppler" in d.lower():
candidate = os.path.join("C:\\", d, "Library", "bin")
if os.path.exists(candidate):
POPPLER_BIN = candidate
break
except:
pass
if POPPLER_BIN:
print(f"✅ FOUND POPPLER: {POPPLER_BIN}")
else:
print("❌ CRITICAL: Poppler bin folder not found.")
print(" -> Ensure you extracted the zip to C:\\ and the folder name contains 'poppler'.")
return (TESSERACT_EXE is not None) and (POPPLER_BIN is not None)
# --- IMPORTS THAT MIGHT FAIL ---
try:
from pypdf import PdfReader, PdfWriter
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
import pytesseract
from pdf2image import convert_from_path
except ImportError as e:
print(f"\n❌ LIBRARY ERROR: {e}")
print(" -> Run this command in PowerShell: & C:/ProgramData/miniconda3/python.exe -m pip install pytesseract pdf2image pypdf reportlab")
sys.exit()
# --- PROCESSING LOGIC ---
def clean_ocr_text(text):
"""Clean up OCR garbage."""
if not text: return ""
text = text.upper()
# Replace common OCR misreads for numbers
# O -> 0, I/l -> 1, S -> 5 (only do this contextually if possible, but global replace helps match zips)
trans = str.maketrans("OIl", "011")
# Only apply to digit-like words? keeping it simple for matching
return text
def get_tokens_and_numbers(text):
"""Returns a set of text tokens and a set of numbers."""
if not text: return set(), set()
# Normalize
text = text.upper().replace('\n', ' ').replace('-', ' ').replace('.', ' ')
# Extract Numbers (Digits only)
numbers = set(re.findall(r'\d+', text))
# Extract Text Tokens (Alphanumeric, remove stopwords)
tokens = set(re.findall(r'[A-Z0-9]+', text))
stop_words = {'SHIP', 'TO', 'USPS', 'GROUND', 'ADVANTAGE', 'POSTAGE', 'PAID', 'WEIGHT', 'OZ', 'LB', 'ZONE', 'COMMERCIAL', 'BASE', 'PRIORITY', 'MAIL'}
tokens = {t for t in tokens if t not in stop_words and len(t) > 2}
return tokens, numbers
def extract_database(slip_path):
print("\n--- READING PACKING SLIPS ---")
reader = PdfReader(slip_path)
db = []
for i, page in enumerate(reader.pages):
text = page.extract_text()
if not text: continue
lines = text.split('\n')
name = "Unknown"
raw_address = ""
items = []
# 1. Parse Name/Address
for idx, line in enumerate(lines):
clean = line.strip()
if clean == "Ship To:" and idx + 1 < len(lines):
name = lines[idx+1].strip()
# Get address dump (next 3 lines)
addr_parts = []
for x in range(2, 6):
if idx+x < len(lines) and "Order ID" not in lines[idx+x]:
addr_parts.append(lines[idx+x].strip())
raw_address = " ".join(addr_parts)
break
# Fallback Name
if name == "Unknown":
for idx, line in enumerate(lines):
if "Shipping Address:" in line and idx + 1 < len(lines):
name = lines[idx+1].strip()
break
# 2. Parse Items
# Look for "SKU:" and grab quantity from previous line
# Also clean up SKU to be just the code
temp_qty = "1"
for line in lines:
clean = line.strip()
# Check for quantity line (e.g. "2 Ohio State...")
# Regex: Start of line, digits, space, letter
qty_match = re.match(r'^(\d+)\s+[A-Za-z]', clean)
if qty_match:
temp_qty = qty_match.group(1)
if "SKU:" in clean:
parts = clean.split("SKU:")
if len(parts) > 1:
sku_clean = parts[1].strip().split(" ")[0] # Grab first word after SKU:
items.append(f"({temp_qty}) {sku_clean}")
temp_qty = "1"
if items:
tokens, numbers = get_tokens_and_numbers(name + " " + raw_address)
db.append({
'id': i,
'name': name,
'tokens': tokens,
'numbers': numbers,
'skus': items,
'matched': False
})
print(f"Loaded {len(db)} orders from Packing Slips.")
return db
def perform_ocr(pdf_path, page_num):
"""
Force OCR on a specific page using the discovered tools.
"""
try:
# Set the tesseract command manually found earlier
pytesseract.pytesseract.tesseract_cmd = TESSERACT_EXE
# Convert PDF to image
images = convert_from_path(
pdf_path,
first_page=page_num+1,
last_page=page_num+1,
poppler_path=POPPLER_BIN,
dpi=300 # High DPI for better reading
)
if not images: return ""
# Read text
text = pytesseract.image_to_string(images[0])
return text
except Exception as e:
print(f" [OCR ERROR Page {page_num+1}]: {e}")
return ""
def find_match(label_text, db):
l_tokens, l_numbers = get_tokens_and_numbers(label_text)
best_entry = None
best_score = 0
for order in db:
if order['matched']: continue
score = 0
# Number Match (Zip, Street) - weighted heavily
# We look for numbers that appear in both sets
common_nums = order['numbers'].intersection(l_numbers)
for num in common_nums:
if len(num) >= 5: score += 50 # Zip code
elif len(num) >= 3: score += 20 # Street Num
elif len(num) > 0: score += 2 # Tiny numbers
# Name/Text Match
common_tokens = order['tokens'].intersection(l_tokens)
score += (len(common_tokens) * 10)
if score > best_score:
best_score = score
best_entry = order
if best_score >= 35: # Threshold
return best_entry
return None
def create_overlay(name, skus, is_error=False):
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=(LABEL_WIDTH, LABEL_HEIGHT))
can.setFillColor("white")
can.setStrokeColor("black")
can.rect(BOX_X, BOX_Y, BOX_WIDTH, BOX_HEIGHT, fill=1, stroke=1)
can.setFillColor("red" if is_error else "black")
can.setFont("Helvetica-Bold", 8)
header = f"TO: {name[:40]}" if not is_error else name
can.drawString(BOX_X + 5, BOX_Y + BOX_HEIGHT - 10, header)
can.setLineWidth(0.5)
can.line(BOX_X, BOX_Y + BOX_HEIGHT - 14, BOX_X + BOX_WIDTH, BOX_Y + BOX_HEIGHT - 14)
font_size = 9
if len(skus) > 2: font_size = 8
if len(skus) > 4: font_size = 6
can.setFont("Helvetica-Bold", font_size)
text_y = BOX_Y + BOX_HEIGHT - 24
for s in skus:
if len(s) > 55: s = s[:53] + "..."
can.drawString(BOX_X + 5, text_y, s)
text_y -= (font_size + 2)
can.save()
packet.seek(0)
return PdfReader(packet)
# --- MAIN ---
def main():
root = tk.Tk()
root.withdraw()
# 1. CHECK TOOLS
tools_ok = find_ocr_tools()
if not tools_ok:
messagebox.showerror("Missing Tools", "The script could not find Tesseract or Poppler.\nPlease check the console for instructions.")
return
# 2. SELECT FILES
slip_path = filedialog.askopenfilename(title="1. Select PACKING SLIPS")
if not slip_path: return
label_path = filedialog.askopenfilename(title="2. Select SHIPPING LABELS")
if not label_path: return
try:
# 3. LOAD DB
order_db = extract_database(slip_path)
# 4. PROCESS
label_reader = PdfReader(label_path)
writer = PdfWriter()
print("\n--- MATCHING LABELS ---")
matches = 0
errors = 0
for i, page in enumerate(label_reader.pages):
# Try basic extract
text = ""
try:
text = page.extract_text()
except: pass
# If text is empty/short, USE OCR
if not text or len(text.strip()) < 10:
print(f" Page {i+1} is an image. Scanning...")
text = perform_ocr(label_path, i)
# Clean OCR result slightly to help matching
text = clean_ocr_text(text)
match = find_match(text, order_db)
if match:
match['matched'] = True
ov = create_overlay(match['name'], match['skus'])
page.merge_page(ov.pages[0])
matches += 1
print(f" Page {i+1}: MATCH -> {match['name']}")
else:
ov = create_overlay("NO MATCH FOUND", ["Check Manually"], is_error=True)
page.merge_page(ov.pages[0])
errors += 1
print(f" Page {i+1}: FAIL")
writer.add_page(page)
print(f"\nDONE. Matches: {matches}, Errors: {errors}")
save_file = filedialog.asksaveasfilename(title="Save Final PDF", initialfile="Amazon_Labels_Complete.pdf", defaultextension=".pdf")
if save_file:
with open(save_file, "wb") as f:
writer.write(f)
messagebox.showinfo("Success", f"Finished!\nMatches: {matches}\nErrors: {errors}")
except Exception as e:
print(f"CRITICAL ERROR: {e}")
messagebox.showerror("Error", str(e))
if __name__ == "__main__":
main()