Drawing Analyzer for Construction
Overview
Analyze construction drawings (PDF, DWG) to extract dimensions, annotations, symbols, title block data, and support automated quantity takeoff and design review.
Business Case
Drawing analysis automation enables:
-
Faster Takeoffs: Extract quantities from drawings
-
Quality Control: Verify drawing completeness
-
Data Extraction: Pull metadata for project systems
-
Design Review: Automated checking against standards
Technical Implementation
from dataclasses import dataclass, field from typing import List, Dict, Any, Optional, Tuple import re import pdfplumber from pathlib import Path
@dataclass class TitleBlockData: project_name: str project_number: str sheet_number: str sheet_title: str discipline: str scale: str date: str revision: str drawn_by: str checked_by: str approved_by: str
@dataclass class Dimension: value: float unit: str dimension_type: str # linear, angular, radial location: Tuple[float, float] associated_text: str
@dataclass class Annotation: text: str annotation_type: str # note, callout, tag, keynote location: Tuple[float, float] references: List[str]
@dataclass class Symbol: symbol_type: str # door, window, equipment, etc. tag: str location: Tuple[float, float] properties: Dict[str, Any]
@dataclass class DrawingAnalysisResult: file_name: str title_block: Optional[TitleBlockData] dimensions: List[Dimension] annotations: List[Annotation] symbols: List[Symbol] scale_factor: float drawing_area: Tuple[float, float] quality_issues: List[str]
class DrawingAnalyzer: """Analyze construction drawings for data extraction."""
# Common dimension patterns
DIMENSION_PATTERNS = [
r"(\d+'-\s*\d+(?:\s*\d+/\d+)?\"?)", # Feet-inches: 10'-6", 10' - 6 1/2"
r"(\d+(?:\.\d+)?)\s*(?:mm|cm|m|ft|in)", # Metric/imperial with unit
r"(\d+'-\d+\")", # Compact feet-inches
r"(\d+)\s*(?:SF|LF|CY|EA)", # Quantity dimensions
]
# Common annotation patterns
ANNOTATION_PATTERNS = {
'keynote': r'^\d{1,2}[A-Z]?$', # 1A, 12, 5B
'room_tag': r'^(?:RM|ROOM)\s*\d+',
'door_tag': r'^[A-Z]?\d{2,3}[A-Z]?$',
'grid_line': r'^[A-Z]$|^\d+$',
'elevation': r'^(?:EL|ELEV)\.?\s*\d+',
'detail_ref': r'^\d+/[A-Z]\d+',
}
# Scale patterns
SCALE_PATTERNS = [
r"SCALE:\s*(\d+(?:/\d+)?)\s*[\"']\s*=\s*(\d+)\s*['\-]", # 1/4" = 1'-0"
r"(\d+):(\d+)", # 1:100
r"NTS|NOT TO SCALE",
]
def __init__(self):
self.results: Dict[str, DrawingAnalysisResult] = {}
def analyze_pdf_drawing(self, pdf_path: str) -> DrawingAnalysisResult:
"""Analyze a PDF drawing."""
path = Path(pdf_path)
all_text = ""
dimensions = []
annotations = []
symbols = []
quality_issues = []
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
# Extract text
text = page.extract_text() or ""
all_text += text + "\n"
# Extract dimensions
page_dims = self._extract_dimensions(text)
dimensions.extend(page_dims)
# Extract annotations
page_annots = self._extract_annotations(text)
annotations.extend(page_annots)
# Extract from tables (often contain schedules)
tables = page.extract_tables()
for table in tables:
symbols.extend(self._parse_schedule_table(table))
# Parse title block
title_block = self._extract_title_block(all_text)
# Determine scale
scale_factor = self._determine_scale(all_text)
# Quality checks
quality_issues = self._check_drawing_quality(
title_block, dimensions, annotations
)
result = DrawingAnalysisResult(
file_name=path.name,
title_block=title_block,
dimensions=dimensions,
annotations=annotations,
symbols=symbols,
scale_factor=scale_factor,
drawing_area=(0, 0), # Would need image analysis
quality_issues=quality_issues
)
self.results[path.name] = result
return result
def _extract_dimensions(self, text: str) -> List[Dimension]:
"""Extract dimensions from text."""
dimensions = []
for pattern in self.DIMENSION_PATTERNS:
matches = re.findall(pattern, text)
for match in matches:
value, unit = self._parse_dimension_value(match)
if value > 0:
dimensions.append(Dimension(
value=value,
unit=unit,
dimension_type='linear',
location=(0, 0),
associated_text=match
))
return dimensions
def _parse_dimension_value(self, dim_text: str) -> Tuple[float, str]:
"""Parse dimension text to value and unit."""
dim_text = dim_text.strip()
# Feet and inches: 10'-6"
ft_in_match = re.match(r"(\d+)'[-\s]*(\d+)?(?:\s*(\d+)/(\d+))?\"?", dim_text)
if ft_in_match:
feet = int(ft_in_match.group(1))
inches = int(ft_in_match.group(2) or 0)
if ft_in_match.group(3) and ft_in_match.group(4):
inches += int(ft_in_match.group(3)) / int(ft_in_match.group(4))
return feet * 12 + inches, 'in'
# Metric with unit
metric_match = re.match(r"(\d+(?:\.\d+)?)\s*(mm|cm|m)", dim_text)
if metric_match:
return float(metric_match.group(1)), metric_match.group(2)
# Just a number
num_match = re.match(r"(\d+(?:\.\d+)?)", dim_text)
if num_match:
return float(num_match.group(1)), ''
return 0, ''
def _extract_annotations(self, text: str) -> List[Annotation]:
"""Extract annotations from text."""
annotations = []
lines = text.split('\n')
for line in lines:
line = line.strip()
if not line:
continue
for annot_type, pattern in self.ANNOTATION_PATTERNS.items():
if re.match(pattern, line, re.IGNORECASE):
annotations.append(Annotation(
text=line,
annotation_type=annot_type,
location=(0, 0),
references=[]
))
break
# General notes
if line.startswith(('NOTE:', 'SEE ', 'REFER TO', 'TYP', 'U.N.O.')):
annotations.append(Annotation(
text=line,
annotation_type='note',
location=(0, 0),
references=[]
))
return annotations
def _extract_title_block(self, text: str) -> Optional[TitleBlockData]:
"""Extract title block information."""
# Common title block patterns
patterns = {
'project_name': r'PROJECT(?:\s*NAME)?:\s*(.+?)(?:\n|$)',
'project_number': r'(?:PROJECT\s*)?(?:NO|NUMBER|#)\.?:\s*(\S+)',
'sheet_number': r'SHEET(?:\s*NO)?\.?:\s*([A-Z]?\d+(?:\.\d+)?)',
'sheet_title': r'SHEET\s*TITLE:\s*(.+?)(?:\n|$)',
'scale': r'SCALE:\s*(.+?)(?:\n|$)',
'date': r'DATE:\s*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
'revision': r'REV(?:ISION)?\.?:\s*(\S+)',
'drawn_by': r'(?:DRAWN|DRN)\s*(?:BY)?:\s*(\S+)',
'checked_by': r'(?:CHECKED|CHK)\s*(?:BY)?:\s*(\S+)',
}
extracted = {}
for field, pattern in patterns.items():
match = re.search(pattern, text, re.IGNORECASE)
extracted[field] = match.group(1).strip() if match else ''
# Determine discipline from sheet number
sheet_num = extracted.get('sheet_number', '')
discipline = ''
if sheet_num:
prefix = sheet_num[0].upper() if sheet_num[0].isalpha() else ''
discipline_map = {
'A': 'Architectural', 'S': 'Structural', 'M': 'Mechanical',
'E': 'Electrical', 'P': 'Plumbing', 'C': 'Civil',
'L': 'Landscape', 'I': 'Interior', 'F': 'Fire Protection'
}
discipline = discipline_map.get(prefix, '')
return TitleBlockData(
project_name=extracted.get('project_name', ''),
project_number=extracted.get('project_number', ''),
sheet_number=sheet_num,
sheet_title=extracted.get('sheet_title', ''),
discipline=discipline,
scale=extracted.get('scale', ''),
date=extracted.get('date', ''),
revision=extracted.get('revision', ''),
drawn_by=extracted.get('drawn_by', ''),
checked_by=extracted.get('checked_by', ''),
approved_by=''
)
def _parse_schedule_table(self, table: List[List]) -> List[Symbol]:
"""Parse schedule table to extract symbols/elements."""
symbols = []
if not table or len(table) < 2:
return symbols
# First row is usually headers
headers = [str(cell).lower() if cell else '' for cell in table[0]]
# Find key columns
tag_col = next((i for i, h in enumerate(headers) if 'tag' in h or 'mark' in h or 'no' in h), 0)
type_col = next((i for i, h in enumerate(headers) if 'type' in h or 'size' in h), -1)
for row in table[1:]:
if len(row) > tag_col and row[tag_col]:
tag = str(row[tag_col]).strip()
symbol_type = str(row[type_col]).strip() if type_col >= 0 and len(row) > type_col else ''
if tag:
props = {}
for i, header in enumerate(headers):
if i < len(row) and row[i]:
props[header] = str(row[i])
symbols.append(Symbol(
symbol_type=symbol_type or 'unknown',
tag=tag,
location=(0, 0),
properties=props
))
return symbols
def _determine_scale(self, text: str) -> float:
"""Determine drawing scale factor."""
for pattern in self.SCALE_PATTERNS:
match = re.search(pattern, text, re.IGNORECASE)
if match:
if 'NTS' in match.group(0).upper():
return 0 # Not to scale
if '=' in match.group(0):
# Imperial: 1/4" = 1'-0"
return self._parse_imperial_scale(match.group(0))
else:
# Metric: 1:100
return 1 / float(match.group(2))
return 1.0 # Default
def _parse_imperial_scale(self, scale_text: str) -> float:
"""Parse imperial scale to factor."""
match = re.search(r'(\d+)(?:/(\d+))?\s*["\']?\s*=\s*(\d+)', scale_text)
if match:
numerator = float(match.group(1))
denominator = float(match.group(2)) if match.group(2) else 1
feet = float(match.group(3))
inches_per_foot = (numerator / denominator)
return inches_per_foot / (feet * 12)
return 1.0
def _check_drawing_quality(self, title_block: TitleBlockData,
dimensions: List, annotations: List) -> List[str]:
"""Check drawing for quality issues."""
issues = []
if title_block:
if not title_block.project_number:
issues.append("Missing project number in title block")
if not title_block.sheet_number:
issues.append("Missing sheet number")
if not title_block.scale:
issues.append("Missing scale indication")
if not title_block.date:
issues.append("Missing date")
if len(dimensions) == 0:
issues.append("No dimensions found - verify drawing content")
# Check for typical construction notes
note_types = [a.annotation_type for a in annotations]
if 'note' not in note_types:
issues.append("No general notes found")
return issues
def generate_drawing_index(self, results: List[DrawingAnalysisResult]) -> str:
"""Generate drawing index from multiple analyzed drawings."""
lines = ["# Drawing Index", ""]
lines.append("| Sheet | Title | Discipline | Scale | Rev |")
lines.append("|-------|-------|------------|-------|-----|")
for result in sorted(results, key=lambda r: r.title_block.sheet_number if r.title_block else ''):
if result.title_block:
tb = result.title_block
lines.append(f"| {tb.sheet_number} | {tb.sheet_title} | {tb.discipline} | {tb.scale} | {tb.revision} |")
return "\n".join(lines)
def generate_report(self, result: DrawingAnalysisResult) -> str:
"""Generate analysis report for a drawing."""
lines = ["# Drawing Analysis Report", ""]
lines.append(f"**File:** {result.file_name}")
if result.title_block:
tb = result.title_block
lines.append("")
lines.append("## Title Block")
lines.append(f"- **Project:** {tb.project_name}")
lines.append(f"- **Project No:** {tb.project_number}")
lines.append(f"- **Sheet:** {tb.sheet_number}")
lines.append(f"- **Title:** {tb.sheet_title}")
lines.append(f"- **Discipline:** {tb.discipline}")
lines.append(f"- **Scale:** {tb.scale}")
lines.append(f"- **Date:** {tb.date}")
lines.append(f"- **Revision:** {tb.revision}")
lines.append("")
lines.append("## Content Summary")
lines.append(f"- **Dimensions Found:** {len(result.dimensions)}")
lines.append(f"- **Annotations Found:** {len(result.annotations)}")
lines.append(f"- **Symbols/Elements:** {len(result.symbols)}")
if result.quality_issues:
lines.append("")
lines.append("## Quality Issues")
for issue in result.quality_issues:
lines.append(f"- ⚠️ {issue}")
if result.symbols:
lines.append("")
lines.append("## Elements Found")
for symbol in result.symbols[:20]:
lines.append(f"- {symbol.tag}: {symbol.symbol_type}")
return "\n".join(lines)
Quick Start
Initialize analyzer
analyzer = DrawingAnalyzer()
Analyze a drawing
result = analyzer.analyze_pdf_drawing("A101_Floor_Plan.pdf")
Check title block
if result.title_block: print(f"Sheet: {result.title_block.sheet_number}") print(f"Title: {result.title_block.sheet_title}") print(f"Scale: {result.title_block.scale}")
Review extracted data
print(f"Dimensions: {len(result.dimensions)}") print(f"Annotations: {len(result.annotations)}") print(f"Symbols: {len(result.symbols)}")
Check quality
for issue in result.quality_issues: print(f"Issue: {issue}")
Generate report
report = analyzer.generate_report(result) print(report)
Dependencies
pip install pdfplumber