import re from collections import defaultdict from urllib2 import urlopen from BeautifulSoup import BeautifulSoup def main(): sorted_nontrivial_craftsman_ppms = list(reversed(sorted( nontrivial_craftsman_ppms(), key=lambda (state, ppm, count, pop): ppm))) print_csv_data(sorted_nontrivial_craftsman_ppms) print print_google_charts_data(sorted_nontrivial_craftsman_ppms) def print_csv_data(ppms): print 'CSV data:' print 'state,ppm,total count,total population' print '\n'.join(','.join(map(str, items)) for items in ppms) def print_google_charts_data(ppms): print 'Google charts states:' print ''.join(state for state, ppm, count, pop in ppms) ppms = [ppm for state, ppm, count, pop in ppms] max_ppm = max(ppms) normalized_ppms = [100 * float(ppm) / float(max_ppm) for ppm in ppms] print 'Google charts data:' print ','.join(map(str, normalized_ppms)) def nontrivial_craftsman_ppms(): for state, ppm, count, pop in craftsman_ppms(): if count > 5: yield state, ppm, count, pop def craftsman_ppms(): for state, count in state_counts().iteritems(): full_name = code_to_state[state] pop = state_populations[full_name.upper()] ratio = float(count) / float(pop) ppm = ratio * 10**6 yield state, ppm, count, pop def state_counts(): state_counts = defaultdict(int) for state in states(): state_counts[state.upper()] += 1 return state_counts def states(): for location in locations(): state = location.split(',')[-1] state = state.strip() is_valid_state = len(state) == 2 and state in code_to_state.keys() if is_valid_state: yield state def locations(): for line in signature_lines(): line_without_name = re.split('\(', line)[-1] location_without_closing_paren = line_without_name.replace(')', '') yield location_without_closing_paren def signature_lines(): URL = 'http://manifesto.softwarecraftsmanship.org/' content = urlopen(URL).read() tds = BeautifulSoup(content).findAll('td') return [td.string for td in tds if td.string] code_to_state = { "WA": "WASHINGTON", "VA": "VIRGINIA", "DE": "DELAWARE", "DC": "DISTRICT OF COLUMBIA", "WI": "WISCONSIN", "WV": "WEST VIRGINIA", "HI": "HAWAII", "AE": "Armed Forces Middle East", "FL": "FLORIDA", "FM": "FEDERATED STATES OF MICRONESIA", "WY": "WYOMING", "NH": "NEW HAMPSHIRE", "NJ": "NEW JERSEY", "NM": "NEW MEXICO", "TX": "TEXAS", "LA": "LOUISIANA", "NC": "NORTH CAROLINA", "ND": "NORTH DAKOTA", "NE": "NEBRASKA", "TN": "TENNESSEE", "NY": "NEW YORK", "PA": "PENNSYLVANIA", "CA": "CALIFORNIA", "NV": "NEVADA", "AA": "Armed Forces Americas", "PW": "PALAU", "GU": "GUAM", "CO": "COLORADO", "VI": "VIRGIN ISLANDS", "AK": "ALASKA", "AL": "ALABAMA", "AP": "Armed Forces Pacific", "AS": "AMERICAN SAMOA", "AR": "ARKANSAS", "VT": "VERMONT", "IL": "ILLINOIS", "GA": "GEORGIA", "IN": "INDIANA", "IA": "IOWA", "OK": "OKLAHOMA", "AZ": "ARIZONA", "ID": "IDAHO", "CT": "CONNECTICUT", "ME": "MAINE", "MD": "MARYLAND", "MA": "MASSACHUSETTS", "OH": "OHIO", "UT": "UTAH", "MO": "MISSOURI", "MN": "MINNESOTA", "MI": "MICHIGAN", "MH": "MARSHALL ISLANDS", "RI": "RHODE ISLAND", "KS": "KANSAS", "MT": "MONTANA", "MP": "NORTHERN MARIANA ISLANDS", "MS": "MISSISSIPPI", "PR": "PUERTO RICO", "SC": "SOUTH CAROLINA", "KY": "KENTUCKY", "OR": "OREGON", "SD": "SOUTH DAKOTA", } state_populations = { "CALIFORNIA": 36756666, "TEXAS": 24326974, "NEW YORK": 19490297, "FLORIDA": 18328340, "ILLINOIS": 12901563, "PENNSYLVANIA": 12448279, "OHIO": 11485910, "MICHIGAN": 10003422, "GEORGIA": 9685744, "NORTH CAROLINA": 9222414, "NEW JERSEY": 8682661, "VIRGINIA": 7769089, "WASHINGTON": 6549224, "ARIZONA": 6500180, "MASSACHUSETTS": 6497967, "INDIANA": 6376792, "TENNESSEE": 6214888, "MISSOURI": 5911605, "MARYLAND": 5633597, "WISCONSIN": 5627967, "MINNESOTA": 5220393, "COLORADO": 4939456, "ALABAMA": 4661900, "SOUTH CAROLINA": 4479800, "LOUISIANA": 4410796, "KENTUCKY": 4269245, "PUERTO RICO": 3954037, "OREGON": 3790060, "OKLAHOMA": 3642361, "CONNECTICUT": 3501252, "IOWA": 3002555, "MISSISSIPPI": 2938618, "ARKANSAS": 2855390, "KANSAS": 2802134, "UTAH": 2736424, "NEVADA": 2600167, "NEW MEXICO": 1984356, "WEST VIRGINIA": 1814468, "NEBRASKA": 1783432, "IDAHO": 1523816, "MAINE": 1316456, "NEW HAMPSHIRE": 1315809, "HAWAII": 1288198, "RHODE ISLAND": 1050788, "MONTANA": 967440, "DELAWARE": 873092, "SOUTH DAKOTA": 804194, "ALASKA": 686293, "NORTH DAKOTA": 641481, "VERMONT": 621270, "DISTRICT OF COLUMBIA": 591833, "WYOMING": 532668, "GUAM": 173456, "US VIRGIN ISLANDS": 108448, "NORTHERN MARIANA ISLANDS": 84546, "AMERICAN SAMOA": 57291, } if __name__ == '__main__': main()