CWICR Multilingual Support
Overview
CWICR database supports 9 languages with consistent work item codes. This skill enables cross-language work item matching, translation, and regional price comparison.
Supported Languages
Code Language Region Currency
AR Arabic Dubai AED
DE German Berlin EUR
EN English Toronto CAD
ES Spanish Barcelona EUR
FR French Paris EUR
HI Hindi Mumbai INR
PT Portuguese São Paulo BRL
RU Russian St. Petersburg RUB
ZH Chinese Shanghai CNY
Python Implementation
import pandas as pd from typing import Dict, Any, List, Optional, Tuple from dataclasses import dataclass from enum import Enum
class CWICRLanguage(Enum): """Supported CWICR languages.""" ARABIC = ("ar", "Arabic", "AED", "Dubai") GERMAN = ("de", "German", "EUR", "Berlin") ENGLISH = ("en", "English", "CAD", "Toronto") SPANISH = ("es", "Spanish", "EUR", "Barcelona") FRENCH = ("fr", "French", "EUR", "Paris") HINDI = ("hi", "Hindi", "INR", "Mumbai") PORTUGUESE = ("pt", "Portuguese", "BRL", "São Paulo") RUSSIAN = ("ru", "Russian", "RUB", "St. Petersburg") CHINESE = ("zh", "Chinese", "CNY", "Shanghai")
@property
def code(self) -> str:
return self.value[0]
@property
def name(self) -> str:
return self.value[1]
@property
def currency(self) -> str:
return self.value[2]
@property
def region(self) -> str:
return self.value[3]
@dataclass class MultilingualWorkItem: """Work item with translations.""" work_item_code: str translations: Dict[str, str] # language_code -> description prices: Dict[str, float] # language_code -> unit_price unit: str
class CWICRMultilingual: """Work with CWICR across languages."""
# Exchange rates to USD (approximate)
EXCHANGE_RATES = {
'AED': 0.27,
'EUR': 1.08,
'CAD': 0.74,
'INR': 0.012,
'BRL': 0.20,
'RUB': 0.011,
'CNY': 0.14,
'USD': 1.0
}
def __init__(self, databases: Dict[str, pd.DataFrame] = None):
"""Initialize with language databases."""
self.databases = databases or {}
self._index_databases()
def _index_databases(self):
"""Create code-based index for each database."""
self.indexes = {}
for lang, df in self.databases.items():
if 'work_item_code' in df.columns:
self.indexes[lang] = df.set_index('work_item_code')
def load_database(self, language: CWICRLanguage,
file_path: str):
"""Load database for specific language."""
# Detect format and load
if file_path.endswith('.parquet'):
df = pd.read_parquet(file_path)
elif file_path.endswith('.xlsx'):
df = pd.read_excel(file_path)
elif file_path.endswith('.csv'):
df = pd.read_csv(file_path)
else:
raise ValueError(f"Unsupported format: {file_path}")
self.databases[language.code] = df
if 'work_item_code' in df.columns:
self.indexes[language.code] = df.set_index('work_item_code')
def get_item_translations(self, work_item_code: str) -> MultilingualWorkItem:
"""Get all translations for a work item."""
translations = {}
prices = {}
unit = ""
for lang, index in self.indexes.items():
if work_item_code in index.index:
row = index.loc[work_item_code]
translations[lang] = str(row.get('description', ''))
prices[lang] = float(row.get('unit_price', 0))
if not unit:
unit = str(row.get('unit', ''))
return MultilingualWorkItem(
work_item_code=work_item_code,
translations=translations,
prices=prices,
unit=unit
)
def translate(self, work_item_code: str,
from_lang: str,
to_lang: str) -> Optional[str]:
"""Translate work item description."""
if to_lang not in self.indexes:
return None
if work_item_code in self.indexes[to_lang].index:
return str(self.indexes[to_lang].loc[work_item_code].get('description', ''))
return None
def compare_prices(self, work_item_code: str,
normalize_to_usd: bool = True) -> Dict[str, float]:
"""Compare prices across regions."""
prices = {}
for lang, index in self.indexes.items():
if work_item_code in index.index:
price = float(index.loc[work_item_code].get('unit_price', 0))
if normalize_to_usd:
# Get currency for this language
currency = self._get_currency(lang)
rate = self.EXCHANGE_RATES.get(currency, 1.0)
price = price * rate
prices[lang] = round(price, 2)
return prices
def _get_currency(self, lang_code: str) -> str:
"""Get currency for language code."""
for lang in CWICRLanguage:
if lang.code == lang_code:
return lang.currency
return 'USD'
def find_cheapest_region(self, work_item_code: str) -> Tuple[str, float]:
"""Find region with lowest price (USD normalized)."""
prices = self.compare_prices(work_item_code, normalize_to_usd=True)
if not prices:
return ('', 0)
cheapest = min(prices.items(), key=lambda x: x[1])
return cheapest
def find_most_expensive_region(self, work_item_code: str) -> Tuple[str, float]:
"""Find region with highest price (USD normalized)."""
prices = self.compare_prices(work_item_code, normalize_to_usd=True)
if not prices:
return ('', 0)
expensive = max(prices.items(), key=lambda x: x[1])
return expensive
def cross_language_search(self, query: str,
source_lang: str) -> Dict[str, List[str]]:
"""Search in one language, get results in all languages."""
if source_lang not in self.databases:
return {}
source_df = self.databases[source_lang]
# Find matching codes
matches = source_df[
source_df['description'].str.contains(query, case=False, na=False)
]['work_item_code'].tolist()
# Get translations for matches
results = {}
for code in matches[:10]: # Limit to 10
item = self.get_item_translations(code)
results[code] = item.translations
return results
def price_comparison_report(self, work_item_codes: List[str]) -> pd.DataFrame:
"""Generate price comparison report across regions."""
rows = []
for code in work_item_codes:
item = self.get_item_translations(code)
prices_usd = self.compare_prices(code, normalize_to_usd=True)
row = {
'code': code,
'description': item.translations.get('en', list(item.translations.values())[0] if item.translations else ''),
'unit': item.unit
}
for lang, price in prices_usd.items():
row[f'price_{lang}_usd'] = price
if prices_usd:
row['min_price'] = min(prices_usd.values())
row['max_price'] = max(prices_usd.values())
row['price_variance'] = row['max_price'] - row['min_price']
rows.append(row)
return pd.DataFrame(rows)
class LanguageDetector: """Detect language of construction text."""
# Common construction terms by language
KEYWORDS = {
'en': ['concrete', 'wall', 'floor', 'door', 'window', 'steel', 'brick'],
'de': ['beton', 'wand', 'boden', 'tür', 'fenster', 'stahl', 'ziegel'],
'es': ['hormigón', 'pared', 'piso', 'puerta', 'ventana', 'acero', 'ladrillo'],
'fr': ['béton', 'mur', 'plancher', 'porte', 'fenêtre', 'acier', 'brique'],
'ru': ['бетон', 'стена', 'пол', 'дверь', 'окно', 'сталь', 'кирпич'],
'zh': ['混凝土', '墙', '地板', '门', '窗', '钢', '砖'],
'pt': ['concreto', 'parede', 'piso', 'porta', 'janela', 'aço', 'tijolo'],
'ar': ['خرسانة', 'جدار', 'أرضية', 'باب', 'نافذة', 'فولاذ', 'طوب'],
'hi': ['कंक्रीट', 'दीवार', 'फर्श', 'दरवाजा', 'खिड़की', 'इस्पात', 'ईंट']
}
@staticmethod
def detect(text: str) -> str:
"""Detect language of text."""
text_lower = text.lower()
scores = {}
for lang, keywords in LanguageDetector.KEYWORDS.items():
score = sum(1 for kw in keywords if kw in text_lower)
if score > 0:
scores[lang] = score
if scores:
return max(scores.items(), key=lambda x: x[1])[0]
return 'en' # Default to English
Quick Start
Initialize multilingual support
multi = CWICRMultilingual()
Load databases
multi.load_database(CWICRLanguage.ENGLISH, "cwicr_en.parquet") multi.load_database(CWICRLanguage.GERMAN, "cwicr_de.parquet") multi.load_database(CWICRLanguage.SPANISH, "cwicr_es.parquet")
Get translations
item = multi.get_item_translations("CONC-001") print(f"EN: {item.translations.get('en')}") print(f"DE: {item.translations.get('de')}")
Price Comparison
Compare concrete prices across regions
prices = multi.compare_prices("CONC-001", normalize_to_usd=True) print(prices)
Find cheapest region
region, price = multi.find_cheapest_region("CONC-001") print(f"Cheapest: {region} at ${price}")
Resources
-
DDC Book: Chapter 2.2 - Open Data Integration
-
CWICR Database: 9 languages, 55,000+ items