continue homework
This commit is contained in:
		
							
								
								
									
										8
									
								
								boa/analysis/.idea/.gitignore
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								boa/analysis/.idea/.gitignore
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,8 @@
 | 
				
			|||||||
 | 
					# Default ignored files
 | 
				
			||||||
 | 
					/shelf/
 | 
				
			||||||
 | 
					/workspace.xml
 | 
				
			||||||
 | 
					# Editor-based HTTP Client requests
 | 
				
			||||||
 | 
					/httpRequests/
 | 
				
			||||||
 | 
					# Datasource local storage ignored files
 | 
				
			||||||
 | 
					/dataSources/
 | 
				
			||||||
 | 
					/dataSources.local.xml
 | 
				
			||||||
							
								
								
									
										10
									
								
								boa/analysis/.idea/analysis.iml
									
									
									
										generated
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								boa/analysis/.idea/analysis.iml
									
									
									
										generated
									
									
									
										Normal file
									
								
							@@ -0,0 +1,10 @@
 | 
				
			|||||||
 | 
					<?xml version="1.0" encoding="UTF-8"?>
 | 
				
			||||||
 | 
					<module type="PYTHON_MODULE" version="4">
 | 
				
			||||||
 | 
					  <component name="NewModuleRootManager">
 | 
				
			||||||
 | 
					    <content url="file://$MODULE_DIR$">
 | 
				
			||||||
 | 
					      <excludeFolder url="file://$MODULE_DIR$/.venv" />
 | 
				
			||||||
 | 
					    </content>
 | 
				
			||||||
 | 
					    <orderEntry type="jdk" jdkName="Python 3.13 (analysis)" jdkType="Python SDK" />
 | 
				
			||||||
 | 
					    <orderEntry type="sourceFolder" forTests="false" />
 | 
				
			||||||
 | 
					  </component>
 | 
				
			||||||
 | 
					</module>
 | 
				
			||||||
							
								
								
									
										12
									
								
								boa/analysis/.idea/inspectionProfiles/Project_Default.xml
									
									
									
										generated
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								boa/analysis/.idea/inspectionProfiles/Project_Default.xml
									
									
									
										generated
									
									
									
										Normal file
									
								
							@@ -0,0 +1,12 @@
 | 
				
			|||||||
 | 
					<component name="InspectionProjectProfileManager">
 | 
				
			||||||
 | 
					  <profile version="1.0">
 | 
				
			||||||
 | 
					    <option name="myName" value="Project Default" />
 | 
				
			||||||
 | 
					    <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
 | 
				
			||||||
 | 
					      <option name="ignoredErrors">
 | 
				
			||||||
 | 
					        <list>
 | 
				
			||||||
 | 
					          <option value="N802" />
 | 
				
			||||||
 | 
					        </list>
 | 
				
			||||||
 | 
					      </option>
 | 
				
			||||||
 | 
					    </inspection_tool>
 | 
				
			||||||
 | 
					  </profile>
 | 
				
			||||||
 | 
					</component>
 | 
				
			||||||
							
								
								
									
										6
									
								
								boa/analysis/.idea/inspectionProfiles/profiles_settings.xml
									
									
									
										generated
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								boa/analysis/.idea/inspectionProfiles/profiles_settings.xml
									
									
									
										generated
									
									
									
										Normal file
									
								
							@@ -0,0 +1,6 @@
 | 
				
			|||||||
 | 
					<component name="InspectionProjectProfileManager">
 | 
				
			||||||
 | 
					  <settings>
 | 
				
			||||||
 | 
					    <option name="USE_PROJECT_PROFILE" value="false" />
 | 
				
			||||||
 | 
					    <version value="1.0" />
 | 
				
			||||||
 | 
					  </settings>
 | 
				
			||||||
 | 
					</component>
 | 
				
			||||||
							
								
								
									
										6
									
								
								boa/analysis/.idea/misc.xml
									
									
									
										generated
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								boa/analysis/.idea/misc.xml
									
									
									
										generated
									
									
									
										Normal file
									
								
							@@ -0,0 +1,6 @@
 | 
				
			|||||||
 | 
					<?xml version="1.0" encoding="UTF-8"?>
 | 
				
			||||||
 | 
					<project version="4">
 | 
				
			||||||
 | 
					  <component name="Black">
 | 
				
			||||||
 | 
					    <option name="sdkName" value="Python 3.13 (analysis)" />
 | 
				
			||||||
 | 
					  </component>
 | 
				
			||||||
 | 
					</project>
 | 
				
			||||||
							
								
								
									
										8
									
								
								boa/analysis/.idea/modules.xml
									
									
									
										generated
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								boa/analysis/.idea/modules.xml
									
									
									
										generated
									
									
									
										Normal file
									
								
							@@ -0,0 +1,8 @@
 | 
				
			|||||||
 | 
					<?xml version="1.0" encoding="UTF-8"?>
 | 
				
			||||||
 | 
					<project version="4">
 | 
				
			||||||
 | 
					  <component name="ProjectModuleManager">
 | 
				
			||||||
 | 
					    <modules>
 | 
				
			||||||
 | 
					      <module fileurl="file://$PROJECT_DIR$/.idea/analysis.iml" filepath="$PROJECT_DIR$/.idea/analysis.iml" />
 | 
				
			||||||
 | 
					    </modules>
 | 
				
			||||||
 | 
					  </component>
 | 
				
			||||||
 | 
					</project>
 | 
				
			||||||
							
								
								
									
										6
									
								
								boa/analysis/.idea/vcs.xml
									
									
									
										generated
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								boa/analysis/.idea/vcs.xml
									
									
									
										generated
									
									
									
										Normal file
									
								
							@@ -0,0 +1,6 @@
 | 
				
			|||||||
 | 
					<?xml version="1.0" encoding="UTF-8"?>
 | 
				
			||||||
 | 
					<project version="4">
 | 
				
			||||||
 | 
					  <component name="VcsDirectoryMappings">
 | 
				
			||||||
 | 
					    <mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
 | 
				
			||||||
 | 
					  </component>
 | 
				
			||||||
 | 
					</project>
 | 
				
			||||||
							
								
								
									
										129
									
								
								boa/analysis/aufgabe_02.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										129
									
								
								boa/analysis/aufgabe_02.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,129 @@
 | 
				
			|||||||
 | 
					import math
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import pandas as pd
 | 
				
			||||||
 | 
					import scipy.stats as stats
 | 
				
			||||||
 | 
					import matplotlib.pyplot as plt
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import matplotlib
 | 
				
			||||||
 | 
					matplotlib.use('TkAgg')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					JAVA_INPUT = './data/try_Java_Jan_2022_last_revision.boa.output.txt'
 | 
				
			||||||
 | 
					PYTHON_INPUT = './data/try_Python_Feb_2022_last_revision.boa.output.txt'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SHOW_PLOTS = False
 | 
				
			||||||
 | 
					SEED = 3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def read_sample(file_path, title='', sample_size=0, show=True):
 | 
				
			||||||
 | 
					    # filename kann auch ein URL sein: "https://..../example.csv"
 | 
				
			||||||
 | 
					    df = pd.read_csv(file_path,
 | 
				
			||||||
 | 
					                     sep=r"\[|\]\s=",
 | 
				
			||||||
 | 
					                     engine="python",
 | 
				
			||||||
 | 
					                     index_col=False,
 | 
				
			||||||
 | 
					                     #nrows=25, # zum Testen nur kleine Anzahl einlesen
 | 
				
			||||||
 | 
					                     skipinitialspace=True,
 | 
				
			||||||
 | 
					                     names=['Variable', 'Project', 'Ratio'],
 | 
				
			||||||
 | 
					                     usecols=['Project', 'Ratio']
 | 
				
			||||||
 | 
					                     )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if sample_size > 0:
 | 
				
			||||||
 | 
					        # feste seed zum besseren Vergleich
 | 
				
			||||||
 | 
					        df = df.sample(sample_size, random_state=int(SEED))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if show:
 | 
				
			||||||
 | 
					        df.info()
 | 
				
			||||||
 | 
					        df.boxplot(column=['Ratio'], grid=False)
 | 
				
			||||||
 | 
					        df.hist(column=['Ratio'], grid=False)
 | 
				
			||||||
 | 
					        plt.title(title)
 | 
				
			||||||
 | 
					        plt.show()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return df
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def cohen_d(x, y):
 | 
				
			||||||
 | 
					    # Mittelwerte der Gruppen
 | 
				
			||||||
 | 
					    mean_x, mean_y = x.mean(), y.mean()
 | 
				
			||||||
 | 
					    mean_diff = mean_x - mean_y
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Varianzen der Gruppen
 | 
				
			||||||
 | 
					    var_x, var_y = x.var(ddof=1), y.var(ddof=1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Stichprobengrößen der Gruppen
 | 
				
			||||||
 | 
					    size_x, size_y = len(x), len(y)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Gepoolte Varianz und Standardabweichung
 | 
				
			||||||
 | 
					    pool_var = ((size_x - 1) * var_x + (size_y - 1) * var_y) / (size_x + size_y - 2)
 | 
				
			||||||
 | 
					    pool_var = math.sqrt(pool_var)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Cohen's d
 | 
				
			||||||
 | 
					    d_val = mean_diff / pool_var
 | 
				
			||||||
 | 
					    return d_val
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def find_barrier(x_sample_path, y_sample_path, alpha=0.1, lower_limit=2, upper_limit=10000):
 | 
				
			||||||
 | 
					    left = lower_limit
 | 
				
			||||||
 | 
					    right = upper_limit
 | 
				
			||||||
 | 
					    barrier_size = -1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Binäre Suche nach der Schranke
 | 
				
			||||||
 | 
					    while left <= right:
 | 
				
			||||||
 | 
					        mid = (left + right) // 2
 | 
				
			||||||
 | 
					        x_sample = read_sample(x_sample_path, sample_size=mid, show=False)["Ratio"]
 | 
				
			||||||
 | 
					        y_sample = read_sample(y_sample_path, sample_size=mid, show=False)["Ratio"]
 | 
				
			||||||
 | 
					        stat, p_value = stats.mannwhitneyu(x_sample, y_sample, alternative="two-sided")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if p_value >= alpha:
 | 
				
			||||||
 | 
					            # Vermerke die aktuelle untere Schranke
 | 
				
			||||||
 | 
					            barrier_size = mid
 | 
				
			||||||
 | 
					            # Kein signifikantes Ergebnis gefunden, probiere größere Probe
 | 
				
			||||||
 | 
					            left = mid + 1
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            # Signifikantes Ergebnis gefunden, probiere kleinere Probe
 | 
				
			||||||
 | 
					            right = mid - 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    x_sample = read_sample(x_sample_path, sample_size=barrier_size + 1, show=False)["Ratio"]
 | 
				
			||||||
 | 
					    y_sample = read_sample(y_sample_path, sample_size=barrier_size + 1, show=False)["Ratio"]
 | 
				
			||||||
 | 
					    stat, p_value = stats.mannwhitneyu(x_sample, y_sample, alternative="two-sided")
 | 
				
			||||||
 | 
					    print(f"\nSind die Unterschiede bei {barrier_size + 1} Proben signifikant? {str(p_value < alpha)}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    x_sample = read_sample(x_sample_path, sample_size=barrier_size, show=False)["Ratio"]
 | 
				
			||||||
 | 
					    y_sample = read_sample(y_sample_path, sample_size=barrier_size, show=False)["Ratio"]
 | 
				
			||||||
 | 
					    stat, p_value = stats.mannwhitneyu(x_sample, y_sample, alternative="two-sided")
 | 
				
			||||||
 | 
					    print(f"Sind die Unterschiede bei {barrier_size} Proben signifikant? {str(p_value < alpha)}")
 | 
				
			||||||
 | 
					    return barrier_size
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def main():
 | 
				
			||||||
 | 
					    plt.close('all')
 | 
				
			||||||
 | 
					    print('Statistische Berechnungen zu Häufigkeiten (Übung 5)')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print('\nEinlesen der ersten Stichprobe (Python)')
 | 
				
			||||||
 | 
					    python_sample = read_sample(PYTHON_INPUT, sample_size=1000, show=SHOW_PLOTS, title="Python")
 | 
				
			||||||
 | 
					    print('Mean:' + str(python_sample['Ratio'].mean()))
 | 
				
			||||||
 | 
					    print('Variance:' + str(python_sample['Ratio'].var()))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print('\nEinlesen der zweiten Stichprobe (Java)')
 | 
				
			||||||
 | 
					    java_sample = read_sample(JAVA_INPUT, sample_size=1000, show=SHOW_PLOTS, title="Java")
 | 
				
			||||||
 | 
					    print('Mean:' + str(java_sample['Ratio'].mean()))
 | 
				
			||||||
 | 
					    print('Variance:' + str(java_sample['Ratio'].var()))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print('\nStatistische Tests')
 | 
				
			||||||
 | 
					    # Aufgabenbearbeitung ab hier
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Mann-Whitney-U-Test
 | 
				
			||||||
 | 
					    stat, p_value = stats.mannwhitneyu(python_sample['Ratio'], java_sample['Ratio'], alternative='two-sided')
 | 
				
			||||||
 | 
					    effect_size = cohen_d(python_sample['Ratio'], java_sample['Ratio'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(f"Globaler Durchschnitt Python: {python_sample.get('Ratio').mean()}")
 | 
				
			||||||
 | 
					    print(f"Globaler Durchschnitt Java: {java_sample.get('Ratio').mean()}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(f"Mann-Whitney-Test: Statistik {stat}, P-Wert {p_value}")
 | 
				
			||||||
 | 
					    print(f"Effektstärke (Cohen's d): {effect_size}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Experimentelle Bestimmung der Schranke
 | 
				
			||||||
 | 
					    barrier = find_barrier(PYTHON_INPUT, JAVA_INPUT, alpha=0.01)
 | 
				
			||||||
 | 
					    print(f"Untere Schranke, ab welcher der Test nicht signifikant ist: {barrier}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Press the green button in the gutter to run the script.
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					    main()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										91403
									
								
								boa/analysis/data/try_Java_Jan_2022_last_revision.boa.output.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										91403
									
								
								boa/analysis/data/try_Java_Jan_2022_last_revision.boa.output.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										102423
									
								
								boa/analysis/data/try_Python_Feb_2022_last_revision.boa.output.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										102423
									
								
								boa/analysis/data/try_Python_Feb_2022_last_revision.boa.output.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										9
									
								
								boa/analysis/test.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								boa/analysis/test.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,9 @@
 | 
				
			|||||||
 | 
					import random
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					random.seed(1)
 | 
				
			||||||
 | 
					print(random.randint(0, 10))
 | 
				
			||||||
 | 
					print(random.randint(0, 10))
 | 
				
			||||||
 | 
					print(random.randint(0, 10))
 | 
				
			||||||
 | 
					print(random.randint(0, 10))
 | 
				
			||||||
 | 
					print(random.randint(0, 10))
 | 
				
			||||||
 | 
					print(random.randint(0, 10))
 | 
				
			||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
# Java-Job: 111128
 | 
					# Java-Job: 111694
 | 
				
			||||||
# Python-Job: 111124
 | 
					# Python-Job: 111439
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Includes all revisions
 | 
					# Includes all revisions
 | 
				
			||||||
p: Project = input;
 | 
					p: Project = input;
 | 
				
			||||||
@@ -12,7 +12,7 @@ cur_date: time;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
statement_counter := visitor {
 | 
					statement_counter := visitor {
 | 
				
			||||||
    before node: Statement -> {
 | 
					    before node: Statement -> {
 | 
				
			||||||
        if(node.kind == StatementKind.TRY or node.kind == StatementKind.WITH)
 | 
					        if (def(node.kind) and (node.kind == StatementKind.TRY or node.kind == StatementKind.WITH))
 | 
				
			||||||
            relative_list[p.name][yearof(cur_date)] << 1;
 | 
					            relative_list[p.name][yearof(cur_date)] << 1;
 | 
				
			||||||
        else
 | 
					        else
 | 
				
			||||||
            relative_list[p.name][yearof(cur_date)] << 0;
 | 
					            relative_list[p.name][yearof(cur_date)] << 0;
 | 
				
			||||||
@@ -22,12 +22,11 @@ statement_counter := visitor {
 | 
				
			|||||||
visit(p, visitor {
 | 
					visit(p, visitor {
 | 
				
			||||||
    before node: CodeRepository -> {
 | 
					    before node: CodeRepository -> {
 | 
				
			||||||
        for (minus_year: int=22; minus_year >= 0; minus_year--) {
 | 
					        for (minus_year: int=22; minus_year >= 0; minus_year--) {
 | 
				
			||||||
            cur_date = addyear(now(), -minus_year);
 | 
					            cur_date = addyear(T"Dec 31, 2022, 10:00:00 AM", -minus_year);
 | 
				
			||||||
            snapshot := getsnapshot(node, cur_date);
 | 
					            snapshot := getsnapshot(node, cur_date);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if (def(snapshot))
 | 
					            foreach (i: int; def(snapshot[i]))
 | 
				
			||||||
                foreach (i: int; def(snapshot[i]))
 | 
					                visit(snapshot[i], statement_counter);
 | 
				
			||||||
                    visit(snapshot[i], statement_counter);
 | 
					 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
});
 | 
					});
 | 
				
			||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user