CWICR Multilingual Support

Overview

CWICR database supports 9 languages with consistent work item codes. This skill enables cross-language work item matching, translation, and regional price comparison.

Supported Languages

Code Language Region Currency

AR Arabic Dubai AED

DE German Berlin EUR

EN English Toronto CAD

ES Spanish Barcelona EUR

FR French Paris EUR

HI Hindi Mumbai INR

PT Portuguese São Paulo BRL

RU Russian St. Petersburg RUB

ZH Chinese Shanghai CNY

Python Implementation

import pandas as pd from typing import Dict, Any, List, Optional, Tuple from dataclasses import dataclass from enum import Enum

class CWICRLanguage(Enum): """Supported CWICR languages.""" ARABIC = ("ar", "Arabic", "AED", "Dubai") GERMAN = ("de", "German", "EUR", "Berlin") ENGLISH = ("en", "English", "CAD", "Toronto") SPANISH = ("es", "Spanish", "EUR", "Barcelona") FRENCH = ("fr", "French", "EUR", "Paris") HINDI = ("hi", "Hindi", "INR", "Mumbai") PORTUGUESE = ("pt", "Portuguese", "BRL", "São Paulo") RUSSIAN = ("ru", "Russian", "RUB", "St. Petersburg") CHINESE = ("zh", "Chinese", "CNY", "Shanghai")

@property
def code(self) -> str:
    return self.value[0]

@property
def name(self) -> str:
    return self.value[1]

@property
def currency(self) -> str:
    return self.value[2]

@property
def region(self) -> str:
    return self.value[3]

@dataclass class MultilingualWorkItem: """Work item with translations.""" work_item_code: str translations: Dict[str, str] # language_code -> description prices: Dict[str, float] # language_code -> unit_price unit: str

class CWICRMultilingual: """Work with CWICR across languages."""

# Exchange rates to USD (approximate)
EXCHANGE_RATES = {
    'AED': 0.27,
    'EUR': 1.08,
    'CAD': 0.74,
    'INR': 0.012,
    'BRL': 0.20,
    'RUB': 0.011,
    'CNY': 0.14,
    'USD': 1.0
}

def __init__(self, databases: Dict[str, pd.DataFrame] = None):
    """Initialize with language databases."""
    self.databases = databases or {}
    self._index_databases()

def _index_databases(self):
    """Create code-based index for each database."""
    self.indexes = {}
    for lang, df in self.databases.items():
        if 'work_item_code' in df.columns:
            self.indexes[lang] = df.set_index('work_item_code')

def load_database(self, language: CWICRLanguage,
                  file_path: str):
    """Load database for specific language."""
    # Detect format and load
    if file_path.endswith('.parquet'):
        df = pd.read_parquet(file_path)
    elif file_path.endswith('.xlsx'):
        df = pd.read_excel(file_path)
    elif file_path.endswith('.csv'):
        df = pd.read_csv(file_path)
    else:
        raise ValueError(f"Unsupported format: {file_path}")

    self.databases[language.code] = df
    if 'work_item_code' in df.columns:
        self.indexes[language.code] = df.set_index('work_item_code')

def get_item_translations(self, work_item_code: str) -> MultilingualWorkItem:
    """Get all translations for a work item."""

    translations = {}
    prices = {}
    unit = ""

    for lang, index in self.indexes.items():
        if work_item_code in index.index:
            row = index.loc[work_item_code]
            translations[lang] = str(row.get('description', ''))
            prices[lang] = float(row.get('unit_price', 0))
            if not unit:
                unit = str(row.get('unit', ''))

    return MultilingualWorkItem(
        work_item_code=work_item_code,
        translations=translations,
        prices=prices,
        unit=unit
    )

def translate(self, work_item_code: str,
              from_lang: str,
              to_lang: str) -> Optional[str]:
    """Translate work item description."""

    if to_lang not in self.indexes:
        return None

    if work_item_code in self.indexes[to_lang].index:
        return str(self.indexes[to_lang].loc[work_item_code].get('description', ''))

    return None

def compare_prices(self, work_item_code: str,
                   normalize_to_usd: bool = True) -> Dict[str, float]:
    """Compare prices across regions."""

    prices = {}

    for lang, index in self.indexes.items():
        if work_item_code in index.index:
            price = float(index.loc[work_item_code].get('unit_price', 0))

            if normalize_to_usd:
                # Get currency for this language
                currency = self._get_currency(lang)
                rate = self.EXCHANGE_RATES.get(currency, 1.0)
                price = price * rate

            prices[lang] = round(price, 2)

    return prices

def _get_currency(self, lang_code: str) -> str:
    """Get currency for language code."""
    for lang in CWICRLanguage:
        if lang.code == lang_code:
            return lang.currency
    return 'USD'

def find_cheapest_region(self, work_item_code: str) -> Tuple[str, float]:
    """Find region with lowest price (USD normalized)."""

    prices = self.compare_prices(work_item_code, normalize_to_usd=True)

    if not prices:
        return ('', 0)

    cheapest = min(prices.items(), key=lambda x: x[1])
    return cheapest

def find_most_expensive_region(self, work_item_code: str) -> Tuple[str, float]:
    """Find region with highest price (USD normalized)."""

    prices = self.compare_prices(work_item_code, normalize_to_usd=True)

    if not prices:
        return ('', 0)

    expensive = max(prices.items(), key=lambda x: x[1])
    return expensive

def cross_language_search(self, query: str,
                          source_lang: str) -> Dict[str, List[str]]:
    """Search in one language, get results in all languages."""

    if source_lang not in self.databases:
        return {}

    source_df = self.databases[source_lang]

    # Find matching codes
    matches = source_df[
        source_df['description'].str.contains(query, case=False, na=False)
    ]['work_item_code'].tolist()

    # Get translations for matches
    results = {}
    for code in matches[:10]:  # Limit to 10
        item = self.get_item_translations(code)
        results[code] = item.translations

    return results

def price_comparison_report(self, work_item_codes: List[str]) -> pd.DataFrame:
    """Generate price comparison report across regions."""

    rows = []
    for code in work_item_codes:
        item = self.get_item_translations(code)
        prices_usd = self.compare_prices(code, normalize_to_usd=True)

        row = {
            'code': code,
            'description': item.translations.get('en', list(item.translations.values())[0] if item.translations else ''),
            'unit': item.unit
        }

        for lang, price in prices_usd.items():
            row[f'price_{lang}_usd'] = price

        if prices_usd:
            row['min_price'] = min(prices_usd.values())
            row['max_price'] = max(prices_usd.values())
            row['price_variance'] = row['max_price'] - row['min_price']

        rows.append(row)

    return pd.DataFrame(rows)

class LanguageDetector: """Detect language of construction text."""

# Common construction terms by language
KEYWORDS = {
    'en': ['concrete', 'wall', 'floor', 'door', 'window', 'steel', 'brick'],
    'de': ['beton', 'wand', 'boden', 'tür', 'fenster', 'stahl', 'ziegel'],
    'es': ['hormigón', 'pared', 'piso', 'puerta', 'ventana', 'acero', 'ladrillo'],
    'fr': ['béton', 'mur', 'plancher', 'porte', 'fenêtre', 'acier', 'brique'],
    'ru': ['бетон', 'стена', 'пол', 'дверь', 'окно', 'сталь', 'кирпич'],
    'zh': ['混凝土', '墙', '地板', '门', '窗', '钢', '砖'],
    'pt': ['concreto', 'parede', 'piso', 'porta', 'janela', 'aço', 'tijolo'],
    'ar': ['خرسانة', 'جدار', 'أرضية', 'باب', 'نافذة', 'فولاذ', 'طوب'],
    'hi': ['कंक्रीट', 'दीवार', 'फर्श', 'दरवाजा', 'खिड़की', 'इस्पात', 'ईंट']
}

@staticmethod
def detect(text: str) -> str:
    """Detect language of text."""
    text_lower = text.lower()

    scores = {}
    for lang, keywords in LanguageDetector.KEYWORDS.items():
        score = sum(1 for kw in keywords if kw in text_lower)
        if score > 0:
            scores[lang] = score

    if scores:
        return max(scores.items(), key=lambda x: x[1])[0]

    return 'en'  # Default to English

Quick Start

Initialize multilingual support

multi = CWICRMultilingual()

Load databases

multi.load_database(CWICRLanguage.ENGLISH, "cwicr_en.parquet") multi.load_database(CWICRLanguage.GERMAN, "cwicr_de.parquet") multi.load_database(CWICRLanguage.SPANISH, "cwicr_es.parquet")

Get translations

item = multi.get_item_translations("CONC-001") print(f"EN: {item.translations.get('en')}") print(f"DE: {item.translations.get('de')}")

Price Comparison

Compare concrete prices across regions

prices = multi.compare_prices("CONC-001", normalize_to_usd=True) print(prices)

Find cheapest region

region, price = multi.find_cheapest_region("CONC-001") print(f"Cheapest: {region} at ${price}")

Resources

DDC Book: Chapter 2.2 - Open Data Integration
CWICR Database: 9 languages, 55,000+ items

cwicr-multilingual

Safety Notice

Copy this and send it to your AI assistant to learn

Initialize multilingual support

Load databases

Get translations

Compare concrete prices across regions

Find cheapest region

Source Transparency

Related Skills

cad-to-data

drawing-analyzer

dwg-to-excel

cost-estimation-resource