# Import reader from csv
from csv import reader

# Read in the App Store and Google Play datasets
open_app_store = open('data/AppleStore.csv')
read_app_store = reader(open_app_store)

open_google_play = open('data/googleplaystore.csv')
read_google_play = reader(open_google_play)


# Save app_store dataset as a list of lists
apple_app_data = list(read_app_store)

# Save google_play dataset as a list of lists
google_play_data = list(read_google_play)

# Explore each dataset
def explore_data(dataset, start, end, rows_and_columns=False):
    dataset_slice = dataset[start:end]    
    for row in dataset_slice:
        print(row)
        print('\n') # adds a new (empty) line after each row

    if rows_and_columns:
        print('Number of rows:', len(dataset))
        print('Number of columns:', len(dataset[0]))

# Explore Google Play data
print("The first five rows in the Google Play Store dataset:")
print('-' * 35)
print('\n')
explore_data(google_play_data, 0, 5) # Return the first five rows

# Check for error in row 10473 as noted in Kaggle discussion
print('-' * 35) # print a line to separate the output
print('\n')
print('The current row data for index position 10473:')
print('-' * 35)
print(google_play_data[10473])
print('\n')

# Row 10473 is missing data for the 'Category column'
# Remove row 10473 from the dataset
del google_play_data[10473]

# Check if the row was removed:
print('The new row at index position 10473 is:')
print('-' * 35)
print(google_play_data[10473])
print('\n')

# Check for duplicate apps
print('Duplicate Android Apps:')
print('-' * 35)
duplicate_android_apps = []
unique_android_apps = []

for app in google_play_data:
    name = app[0]
    if name in unique_android_apps:
        duplicate_android_apps.append(name)
        
    else:
        unique_android_apps.append(name)
        

# Print the number of duplicate apps in the google play dataset
if len(duplicate_android_apps) > 0:
    print(f'There are {len(duplicate_android_apps)} duplicate android apps.')
    print('\n')
    print('Example of duplicate android apps:', duplicate_android_apps[:15])
    
else:
    print(f'There are {len(duplicate_android_apps)} duplicate android apps.')

The first five rows in the Google Play Store dataset:
-----------------------------------


['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver']


['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']


['Coloring book moana', 'ART_AND_DESIGN', '3.9', '967', '14M', '500,000+', 'Free', '0', 'Everyone', 'Art & Design;Pretend Play', 'January 15, 2018', '2.0.0', '4.0.3 and up']


['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up']


['Sketch - Draw & Paint', 'ART_AND_DESIGN', '4.5', '215644', '25M', '50,000,000+', 'Free', '0', 'Teen', 'Art & Design', 'June 8, 2018', 'Varies with device', '4.2 and up']


-----------------------------------


The current row data for index position 10473:
-----------------------------------
['Life Made WI-Fi Touchscreen Photo Frame', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up']


The new row at index position 10473 is:
-----------------------------------
['osmino Wi-Fi: free WiFi', 'TOOLS', '4.2', '134203', '4.1M', '10,000,000+', 'Free', '0', 'Everyone', 'Tools', 'August 7, 2018', '6.06.14', '4.4 and up']


Duplicate Android Apps:
-----------------------------------
There are 1181 duplicate android apps.


Example of duplicate android apps: ['Quick PDF Scanner + OCR FREE', 'Box', 'Google My Business', 'ZOOM Cloud Meetings', 'join.me - Simple Meetings', 'Box', 'Zenefits', 'Google Ads', 'Google My Business', 'Slack', 'FreshBooks Classic', 'Insightly CRM', 'QuickBooks Accounting: Invoicing & Expenses', 'HipChat - Chat Built for Teams', 'Xero Accounting Software']

# Explore App Store data
print("The first five rows in the App Store dataset:")
print('-' * 35)
print('\n')

explore_data(apple_app_data, 0, 5) # Return the first five rows

# Check for duplicate apps
duplicate_apple_apps = []
seen_ids = set()

for app in apple_app_data[1:]:
    app_id = app[0]
    name = app[1]
    if app_id in seen_ids:
        duplicate_apple_apps.append(app_id, name)
        
    else:
        seen_ids.add(app_id)
        
        
# Print the number of duplicate apps in the App Store dataset
print('Duplicate Apple Apps:')
print('-' * 35)

if len(duplicate_apple_apps) > 0:
    print(f'There are {len(duplicate_apple_apps)} duplicate apple apps.')
    print('\n')
    print('Example of duplicate apple apps:', duplicate_apple_apps[:15])
    
else:
    print(f'There are {len(duplicate_apple_apps)} duplicate apple apps.')

The first five rows in the App Store dataset:
-----------------------------------


['id', 'track_name', 'size_bytes', 'currency', 'price', 'rating_count_tot', 'rating_count_ver', 'user_rating', 'user_rating_ver', 'ver', 'cont_rating', 'prime_genre', 'sup_devices.num', 'ipadSc_urls.num', 'lang.num', 'vpp_lic']


['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1']


['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1']


['529479190', 'Clash of Clans', '116476928', 'USD', '0.0', '2130805', '579', '4.5', '4.5', '9.24.12', '9+', 'Games', '38', '5', '18', '1']


['420009108', 'Temple Run', '65921024', 'USD', '0.0', '1724546', '3842', '4.5', '4.0', '1.6.2', '9+', 'Games', '40', '5', '1', '1']


Duplicate Apple Apps:
-----------------------------------
There are 0 duplicate apple apps.

# Removing Duplicate Entries

reviews_max = {}

for row in google_play_data[1:]: # Exclude header row
    name = row[0]
    n_reviews = float(row[3])
    
    if name in reviews_max and reviews_max[name] < n_reviews:
        reviews_max[name] = n_reviews
        
    elif name not in reviews_max:
        reviews_max[name] = n_reviews
        
# Inspect the dictionary
print('The first five entries in the reviews_max dictionary:')
print('-' * 35)
for key, value in list(reviews_max.items())[:5]:
    print(f'| {key} | {value}')
    
print('\n')
print(f'The reviews_max dictionary has {len(reviews_max)} entries.')

# Remove the duplicate rows utilizing reviews_max dictionary
print('\n')
print('The first five rows of the android_clean dataset:')
print('-' * 35)
android_clean = []
already_added = []

for row in google_play_data[1:]:
    name = row[0]
    n_reviews = float(row[3])
    
    if n_reviews == reviews_max[name] and name not in already_added:
        android_clean.append(row)
        already_added.append(name)
        
        
# Explore the android_clean dataset
explore_data(android_clean, 0, 5)

print(f'The android_clean dataset has {len(android_clean)} entries.')

The first five entries in the reviews_max dictionary:
-----------------------------------
| Photo Editor & Candy Camera & Grid & ScrapBook | 159.0
| Coloring book moana | 974.0
| U Launcher Lite – FREE Live Cool Themes, Hide Apps | 87510.0
| Sketch - Draw & Paint | 215644.0
| Pixel Draw - Number Art Coloring Book | 967.0


The reviews_max dictionary has 9659 entries.


The first five rows of the android_clean dataset:
-----------------------------------
['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']


['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up']


['Sketch - Draw & Paint', 'ART_AND_DESIGN', '4.5', '215644', '25M', '50,000,000+', 'Free', '0', 'Teen', 'Art & Design', 'June 8, 2018', 'Varies with device', '4.2 and up']


['Pixel Draw - Number Art Coloring Book', 'ART_AND_DESIGN', '4.3', '967', '2.8M', '100,000+', 'Free', '0', 'Everyone', 'Art & Design;Creativity', 'June 20, 2018', '1.1', '4.4 and up']


['Paper flowers instructions', 'ART_AND_DESIGN', '4.4', '167', '5.6M', '50,000+', 'Free', '0', 'Everyone', 'Art & Design', 'March 26, 2017', '1.0', '2.3 and up']


The android_clean dataset has 9659 entries.

# Removing Non-English Apps
def english_check(string):
    """
    This function checks if a string contains more than 3 non-English characters.
    It returns True if a string contains 3 or fewer 
    non-English characters, False otherwise.
    """
    non_english_count = 0
    for character in string:
        if ord(character) > 127:
            non_english_count += 1
            if non_english_count > 3:
                return False
    return True

#print(english_check('Instagram'))
#print(english_check('爱奇艺PPS -《欢乐颂2》电视剧热播'))
#print(english_check('Docs To Go™ Free Office Suite'))
#print(english_check('Instachat 😜'))

# Filter out non-English apps from android_clean dataset
android_english = []
for app in android_clean:
    name = app[0]
    
    if english_check(name):
        android_english.append(app)
        
# Filter out non-English apps from apple_app_data dataset
ios_english = []
for app in apple_app_data[1:]:
    name = app[0]
    
    if english_check(name):
        ios_english.append(app)

# Explore the filtered datasets
print("Android English Apps:")
explore_data(android_english, 0, 3, True)
print("\nIos English Apps:")
explore_data(ios_english, 0, 3, True)

Android English Apps:
['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']


['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up']


['Sketch - Draw & Paint', 'ART_AND_DESIGN', '4.5', '215644', '25M', '50,000,000+', 'Free', '0', 'Teen', 'Art & Design', 'June 8, 2018', 'Varies with device', '4.2 and up']


Number of rows: 9614
Number of columns: 13

Ios English Apps:
['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1']


['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1']


['529479190', 'Clash of Clans', '116476928', 'USD', '0.0', '2130805', '579', '4.5', '4.5', '9.24.12', '9+', 'Games', '38', '5', '18', '1']


Number of rows: 7197
Number of columns: 16

# Isolating Free Android Apps
free_android_apps = []

for app in android_english:
    price = app[-6]
    
    if price == '0' or price == '0.0':
        free_android_apps.append(app)
        
# Isolating Free iOS Apps
free_ios_apps = []

for app in ios_english:
    price = app[4]
    
    if price == '0' or price == '0.0':
        free_ios_apps.append(app)
        
# Explore the filtered datasets
print("Android Free Apps:")
explore_data(free_android_apps, 0, 3, True)
print("\nIos Free Apps:")
explore_data(free_ios_apps, 0, 3, True)

Android Free Apps:
['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']


['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up']


['Sketch - Draw & Paint', 'ART_AND_DESIGN', '4.5', '215644', '25M', '50,000,000+', 'Free', '0', 'Teen', 'Art & Design', 'June 8, 2018', 'Varies with device', '4.2 and up']


Number of rows: 8864
Number of columns: 13

Ios Free Apps:
['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1']


['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1']


['529479190', 'Clash of Clans', '116476928', 'USD', '0.0', '2130805', '579', '4.5', '4.5', '9.24.12', '9+', 'Games', '38', '5', '18', '1']


Number of rows: 4056
Number of columns: 16

# Most common apps by genre
# Create a frequency table function


def freq_table(dataset, index):
    app_dictionary = {}
    total = len(dataset)
    
    for app in dataset:
        app_index = app[index]
        
        if app_index in app_dictionary:
            app_dictionary[app_index] += 1
        else:
            app_dictionary[app_index] = 1
            
    table_percentages = {key: (value / total) * 100 for key, value in app_dictionary.items()}
    
    return table_percentages

android_header = ['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver']
ios_header = ['id', 'track_name', 'size_bytes', 'currency', 'price', 'rating_count_tot', 'rating_count_ver', 'user_rating', 'user_rating_ver', 'ver', 'cont_rating', 'prime_genre', 'sup_devices.num', 'ipadSc_urls.num', 'lang.num', 'vpp_lic']

def display_table(dataset, index):
    table = freq_table(dataset, index)
    table_display = []
    for key in table:
        key_val_as_tuple = (table[key], key)
        table_display.append(key_val_as_tuple)
        
    table_sorted = sorted(table_display, reverse=True)
    for entry in table_sorted:
        print(f"{entry[1]} : {entry[0]}")

print('Android Genre Frequency Table:')
display_table(free_android_apps, 9)
print("\nAndroid Category Frequency Table:")
display_table(free_android_apps, 1)
print("\niOS Prime Genre Frequency Table:")
display_table(free_ios_apps, -5)

Android Genre Frequency Table:
Tools : 8.449909747292418
Entertainment : 6.069494584837545
Education : 5.347472924187725
Business : 4.591606498194946
Productivity : 3.892148014440433
Lifestyle : 3.892148014440433
Finance : 3.7003610108303246
Medical : 3.531137184115524
Sports : 3.463447653429603
Personalization : 3.3167870036101084
Communication : 3.2378158844765346
Action : 3.1024368231046933
Health & Fitness : 3.0798736462093865
Photography : 2.944494584837545
News & Magazines : 2.7978339350180503
Social : 2.6624548736462095
Travel & Local : 2.3240072202166067
Shopping : 2.2450361010830324
Books & Reference : 2.1435018050541514
Simulation : 2.0419675090252705
Dating : 1.861462093862816
Arcade : 1.8501805054151623
Video Players & Editors : 1.7712093862815883
Casual : 1.7599277978339352
Maps & Navigation : 1.3989169675090252
Food & Drink : 1.2409747292418771
Puzzle : 1.128158844765343
Racing : 0.9927797833935018
Role Playing : 0.9363718411552346
Libraries & Demo : 0.9363718411552346
Auto & Vehicles : 0.9250902527075812
Strategy : 0.9138086642599278
House & Home : 0.8235559566787004
Weather : 0.8009927797833934
Events : 0.7107400722021661
Adventure : 0.6768953068592057
Comics : 0.6092057761732852
Beauty : 0.5979241877256317
Art & Design : 0.5979241877256317
Parenting : 0.4963898916967509
Card : 0.45126353790613716
Casino : 0.42870036101083037
Trivia : 0.41741877256317694
Educational;Education : 0.39485559566787
Board : 0.3835740072202166
Educational : 0.3722924187725632
Education;Education : 0.33844765342960287
Word : 0.2594765342960289
Casual;Pretend Play : 0.236913357400722
Music : 0.2030685920577617
Racing;Action & Adventure : 0.16922382671480143
Puzzle;Brain Games : 0.16922382671480143
Entertainment;Music & Video : 0.16922382671480143
Casual;Brain Games : 0.13537906137184114
Casual;Action & Adventure : 0.13537906137184114
Arcade;Action & Adventure : 0.12409747292418773
Action;Action & Adventure : 0.10153429602888085
Educational;Pretend Play : 0.09025270758122744
Simulation;Action & Adventure : 0.078971119133574
Parenting;Education : 0.078971119133574
Entertainment;Brain Games : 0.078971119133574
Board;Brain Games : 0.078971119133574
Parenting;Music & Video : 0.06768953068592057
Educational;Brain Games : 0.06768953068592057
Casual;Creativity : 0.06768953068592057
Art & Design;Creativity : 0.06768953068592057
Education;Pretend Play : 0.056407942238267145
Role Playing;Pretend Play : 0.04512635379061372
Education;Creativity : 0.04512635379061372
Role Playing;Action & Adventure : 0.033844765342960284
Puzzle;Action & Adventure : 0.033844765342960284
Entertainment;Creativity : 0.033844765342960284
Entertainment;Action & Adventure : 0.033844765342960284
Educational;Creativity : 0.033844765342960284
Educational;Action & Adventure : 0.033844765342960284
Education;Music & Video : 0.033844765342960284
Education;Brain Games : 0.033844765342960284
Education;Action & Adventure : 0.033844765342960284
Adventure;Action & Adventure : 0.033844765342960284
Video Players & Editors;Music & Video : 0.02256317689530686
Sports;Action & Adventure : 0.02256317689530686
Simulation;Pretend Play : 0.02256317689530686
Puzzle;Creativity : 0.02256317689530686
Music;Music & Video : 0.02256317689530686
Entertainment;Pretend Play : 0.02256317689530686
Casual;Education : 0.02256317689530686
Board;Action & Adventure : 0.02256317689530686
Video Players & Editors;Creativity : 0.01128158844765343
Trivia;Education : 0.01128158844765343
Travel & Local;Action & Adventure : 0.01128158844765343
Tools;Education : 0.01128158844765343
Strategy;Education : 0.01128158844765343
Strategy;Creativity : 0.01128158844765343
Strategy;Action & Adventure : 0.01128158844765343
Simulation;Education : 0.01128158844765343
Role Playing;Brain Games : 0.01128158844765343
Racing;Pretend Play : 0.01128158844765343
Puzzle;Education : 0.01128158844765343
Parenting;Brain Games : 0.01128158844765343
Music & Audio;Music & Video : 0.01128158844765343
Lifestyle;Pretend Play : 0.01128158844765343
Lifestyle;Education : 0.01128158844765343
Health & Fitness;Education : 0.01128158844765343
Health & Fitness;Action & Adventure : 0.01128158844765343
Entertainment;Education : 0.01128158844765343
Communication;Creativity : 0.01128158844765343
Comics;Creativity : 0.01128158844765343
Casual;Music & Video : 0.01128158844765343
Card;Action & Adventure : 0.01128158844765343
Books & Reference;Education : 0.01128158844765343
Art & Design;Pretend Play : 0.01128158844765343
Art & Design;Action & Adventure : 0.01128158844765343
Arcade;Pretend Play : 0.01128158844765343
Adventure;Education : 0.01128158844765343

Android Category Frequency Table:
FAMILY : 18.907942238267147
GAME : 9.724729241877256
TOOLS : 8.461191335740072
BUSINESS : 4.591606498194946
LIFESTYLE : 3.9034296028880866
PRODUCTIVITY : 3.892148014440433
FINANCE : 3.7003610108303246
MEDICAL : 3.531137184115524
SPORTS : 3.395758122743682
PERSONALIZATION : 3.3167870036101084
COMMUNICATION : 3.2378158844765346
HEALTH_AND_FITNESS : 3.0798736462093865
PHOTOGRAPHY : 2.944494584837545
NEWS_AND_MAGAZINES : 2.7978339350180503
SOCIAL : 2.6624548736462095
TRAVEL_AND_LOCAL : 2.33528880866426
SHOPPING : 2.2450361010830324
BOOKS_AND_REFERENCE : 2.1435018050541514
DATING : 1.861462093862816
VIDEO_PLAYERS : 1.7937725631768955
MAPS_AND_NAVIGATION : 1.3989169675090252
FOOD_AND_DRINK : 1.2409747292418771
EDUCATION : 1.1620036101083033
ENTERTAINMENT : 0.9589350180505415
LIBRARIES_AND_DEMO : 0.9363718411552346
AUTO_AND_VEHICLES : 0.9250902527075812
HOUSE_AND_HOME : 0.8235559566787004
WEATHER : 0.8009927797833934
EVENTS : 0.7107400722021661
PARENTING : 0.6543321299638989
ART_AND_DESIGN : 0.6430505415162455
COMICS : 0.6204873646209386
BEAUTY : 0.5979241877256317

iOS Prime Genre Frequency Table:
Games : 55.64595660749507
Entertainment : 8.234714003944774
Photo & Video : 4.117357001972387
Social Networking : 3.5256410256410255
Education : 3.2544378698224854
Shopping : 2.983234714003945
Utilities : 2.687376725838264
Lifestyle : 2.3175542406311638
Finance : 2.0710059171597637
Sports : 1.947731755424063
Health & Fitness : 1.8737672583826428
Music : 1.6518737672583828
Book : 1.6272189349112427
Productivity : 1.5285996055226825
News : 1.4299802761341223
Travel : 1.3806706114398422
Food & Drink : 1.0601577909270217
Weather : 0.7642998027613412
Reference : 0.4930966469428008
Navigation : 0.4930966469428008
Business : 0.4930966469428008
Catalogs : 0.22189349112426035
Medical : 0.19723865877712032

# Most Popular Apps by Genre in App Store

def freq_table(dataset, index):
    app_dictionary = {}
    total = len(dataset)
    
    for app in dataset:
        app_index = app[index]
        
        if app_index in app_dictionary:
            app_dictionary[app_index] += 1
        else:
            app_dictionary[app_index] = 1
            
    table_percentages = {key: (value / total) * 100 for key, value in app_dictionary.items()}
    
    return table_percentages

genres_ios = freq_table(free_ios_apps, -5)

for genre in genres_ios:
    total = 0  # To store the sum of user ratings
    len_genre = 0  # To store number of apps in each genre

    for app in free_ios_apps:
        genre_app = app[-5]
        
        if genre_app == genre:
            user_ratings = float(app[5]) 
            total += user_ratings
            len_genre += 1
            
    if len_genre > 0:  # Avoid division by zero
        avg_user_ratings = round(total / len_genre, 2)
        print(f"Genre: {genre}, Average User Ratings: {avg_user_ratings}")

Genre: Social Networking, Average User Ratings: 53078.2
Genre: Photo & Video, Average User Ratings: 27249.89
Genre: Games, Average User Ratings: 18924.69
Genre: Music, Average User Ratings: 56482.03
Genre: Reference, Average User Ratings: 67447.9
Genre: Health & Fitness, Average User Ratings: 19952.32
Genre: Weather, Average User Ratings: 47220.94
Genre: Utilities, Average User Ratings: 14010.1
Genre: Travel, Average User Ratings: 20216.02
Genre: Shopping, Average User Ratings: 18746.68
Genre: News, Average User Ratings: 15892.72
Genre: Navigation, Average User Ratings: 25972.05
Genre: Lifestyle, Average User Ratings: 8978.31
Genre: Entertainment, Average User Ratings: 10822.96
Genre: Food & Drink, Average User Ratings: 20179.09
Genre: Sports, Average User Ratings: 20128.97
Genre: Book, Average User Ratings: 8498.33
Genre: Finance, Average User Ratings: 13522.26
Genre: Education, Average User Ratings: 6266.33
Genre: Productivity, Average User Ratings: 19053.89
Genre: Business, Average User Ratings: 6367.8
Genre: Catalogs, Average User Ratings: 1779.56
Genre: Medical, Average User Ratings: 459.75

# Most Popular Apps by Genre in Google Play Store

categories_android = freq_table(free_android_apps, 1)

for category in categories_android:
    total = 0  # To store the sum of installs
    len_category = 0  # To store number of apps in each category

    for app in free_android_apps:
        category_app = app[1]
        
        if category_app == category:
            num_installs = app[5].replace('+', '').replace(',', '')
            num_installs = float(num_installs)
            total += num_installs
            len_category += 1
            
    if len_category > 0:  # Avoid division by zero
        avg_installs = round(total / len_category, 2)
        print(f"Category: {category}, Average Installs: {avg_installs}")

Category: ART_AND_DESIGN, Average Installs: 1986335.09
Category: AUTO_AND_VEHICLES, Average Installs: 647317.82
Category: BEAUTY, Average Installs: 513151.89
Category: BOOKS_AND_REFERENCE, Average Installs: 8767811.89
Category: BUSINESS, Average Installs: 1712290.15
Category: COMICS, Average Installs: 817657.27
Category: COMMUNICATION, Average Installs: 38456119.17
Category: DATING, Average Installs: 854028.83
Category: EDUCATION, Average Installs: 1833495.15
Category: ENTERTAINMENT, Average Installs: 11640705.88
Category: EVENTS, Average Installs: 253542.22
Category: FINANCE, Average Installs: 1387692.48
Category: FOOD_AND_DRINK, Average Installs: 1924897.74
Category: HEALTH_AND_FITNESS, Average Installs: 4188821.99
Category: HOUSE_AND_HOME, Average Installs: 1331540.56
Category: LIBRARIES_AND_DEMO, Average Installs: 638503.73
Category: LIFESTYLE, Average Installs: 1437816.27
Category: GAME, Average Installs: 15588015.6
Category: FAMILY, Average Installs: 3695641.82
Category: MEDICAL, Average Installs: 120550.62
Category: SOCIAL, Average Installs: 23253652.13
Category: SHOPPING, Average Installs: 7036877.31
Category: PHOTOGRAPHY, Average Installs: 17840110.4
Category: SPORTS, Average Installs: 3638640.14
Category: TRAVEL_AND_LOCAL, Average Installs: 13984077.71
Category: TOOLS, Average Installs: 10801391.3
Category: PERSONALIZATION, Average Installs: 5201482.61
Category: PRODUCTIVITY, Average Installs: 16787331.34
Category: PARENTING, Average Installs: 542603.62
Category: WEATHER, Average Installs: 5074486.2
Category: VIDEO_PLAYERS, Average Installs: 24727872.45
Category: NEWS_AND_MAGAZINES, Average Installs: 9549178.47
Category: MAPS_AND_NAVIGATION, Average Installs: 4056941.77

Column Name	Description
`id`	App ID
`track_name`	App Name
`size_bytes`	Size (in Bytes)
`currency`	Currency Type
`price`	Price amount
`rating_count_tot`	User Rating counts (for all versions)
`rating_count_ver`	User Rating counts (for current version)
`user_rating`	Average User Rating value (for all versions)
`user_rating_ver`	Average User Rating value (for current version)
`ver`	Latest version code
`cont_rating`	Content Rating
`prime_genre`	Primary Genre
`sup_devices.num`	Number of supporting devices
`ipadSc_urls.num`	Number of screenshots displayed for app preview
`lang.num`	Number of supported languages
`vpp_lic`	VPP Device-Based Licensing Enabled

Column Name	Description
`App`	App Name
`Category`	App Category
`Rating`	Average User Rating
`Reviews`	Number of User Reviews
`Size`	Size of the App (Varies by Device)
`Installs`	Number of User Installs
`Type`	App Type (Free or Paid)
`Price`	Price of the App
`Content Rating`	Age Group Targeted
`Genres`	App Genres
`Last Updated`	Date of Last Update
`Current Ver`	Current Version of the App
`Android Ver`	Minimum Android Version Required

What Makes an App Go Viral?¶

Dataset Description¶

📱 Apple App Store Dataset¶

🤖 Google Play Store Dataset¶

1) Load CSVs into Python Lists¶

2) Helper: `explore_data(dataset, start, end, rows_and_columns=False)`¶

3) Google Play: Inspect, Fix a Known Bad Row, and Check Duplicates¶

4) Apple App Store: Inspect and Check Duplicates¶

5) De-duplicating Google Play by Keeping the Most-Reviewed Entry¶

Removing Non-English App Names (ASCII Heuristic)¶

Isolating Free Apps (Android & iOS)¶

Validating Our App Ideas with a Lean, Data-Driven Strategy¶

Market Pulse: What the Frequency Tables Say¶

Apple App Store — Prime Genre (iOS)¶

Google Play — Category & Genres (Android)¶

Cross-Market Read: iOS vs. Android¶

Recommendation (Provisional) & Next Steps¶

iOS Popularity by Genre (Based on `rating_count_tot` Averages)¶

Google Play Popularity by Category (Based on Average Installs)¶

What Makes an App Go Viral?¶

Dataset Description¶

📱 Apple App Store Dataset¶

🤖 Google Play Store Dataset¶

1) Load CSVs into Python Lists¶

2) Helper: explore_data(dataset, start, end, rows_and_columns=False)¶

3) Google Play: Inspect, Fix a Known Bad Row, and Check Duplicates¶

4) Apple App Store: Inspect and Check Duplicates¶

5) De-duplicating Google Play by Keeping the Most-Reviewed Entry¶

Removing Non-English App Names (ASCII Heuristic)¶

Isolating Free Apps (Android & iOS)¶

Validating Our App Ideas with a Lean, Data-Driven Strategy¶

Market Pulse: What the Frequency Tables Say¶

Apple App Store — Prime Genre (iOS)¶

Google Play — Category & Genres (Android)¶

Cross-Market Read: iOS vs. Android¶

Recommendation (Provisional) & Next Steps¶

iOS Popularity by Genre (Based on rating_count_tot Averages)¶

Google Play Popularity by Category (Based on Average Installs)¶

2) Helper: `explore_data(dataset, start, end, rows_and_columns=False)`¶

iOS Popularity by Genre (Based on `rating_count_tot` Averages)¶