continue homework
This commit is contained in:
parent
38709f0de0
commit
bd30888c3f
8
boa/analysis/.idea/.gitignore
vendored
Normal file
8
boa/analysis/.idea/.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
10
boa/analysis/.idea/analysis.iml
Normal file
10
boa/analysis/.idea/analysis.iml
Normal file
@ -0,0 +1,10 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.13 (analysis)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
12
boa/analysis/.idea/inspectionProfiles/Project_Default.xml
Normal file
12
boa/analysis/.idea/inspectionProfiles/Project_Default.xml
Normal file
@ -0,0 +1,12 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<profile version="1.0">
|
||||
<option name="myName" value="Project Default" />
|
||||
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
||||
<option name="ignoredErrors">
|
||||
<list>
|
||||
<option value="N802" />
|
||||
</list>
|
||||
</option>
|
||||
</inspection_tool>
|
||||
</profile>
|
||||
</component>
|
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
6
boa/analysis/.idea/misc.xml
Normal file
6
boa/analysis/.idea/misc.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.13 (analysis)" />
|
||||
</component>
|
||||
</project>
|
8
boa/analysis/.idea/modules.xml
Normal file
8
boa/analysis/.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/analysis.iml" filepath="$PROJECT_DIR$/.idea/analysis.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
6
boa/analysis/.idea/vcs.xml
Normal file
6
boa/analysis/.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
|
||||
</component>
|
||||
</project>
|
129
boa/analysis/aufgabe_02.py
Normal file
129
boa/analysis/aufgabe_02.py
Normal file
@ -0,0 +1,129 @@
|
||||
import math
|
||||
import time
|
||||
|
||||
import pandas as pd
|
||||
import scipy.stats as stats
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use('TkAgg')
|
||||
|
||||
JAVA_INPUT = './data/try_Java_Jan_2022_last_revision.boa.output.txt'
|
||||
PYTHON_INPUT = './data/try_Python_Feb_2022_last_revision.boa.output.txt'
|
||||
|
||||
SHOW_PLOTS = False
|
||||
SEED = 3
|
||||
|
||||
def read_sample(file_path, title='', sample_size=0, show=True):
|
||||
# filename kann auch ein URL sein: "https://..../example.csv"
|
||||
df = pd.read_csv(file_path,
|
||||
sep=r"\[|\]\s=",
|
||||
engine="python",
|
||||
index_col=False,
|
||||
#nrows=25, # zum Testen nur kleine Anzahl einlesen
|
||||
skipinitialspace=True,
|
||||
names=['Variable', 'Project', 'Ratio'],
|
||||
usecols=['Project', 'Ratio']
|
||||
)
|
||||
|
||||
if sample_size > 0:
|
||||
# feste seed zum besseren Vergleich
|
||||
df = df.sample(sample_size, random_state=int(SEED))
|
||||
|
||||
if show:
|
||||
df.info()
|
||||
df.boxplot(column=['Ratio'], grid=False)
|
||||
df.hist(column=['Ratio'], grid=False)
|
||||
plt.title(title)
|
||||
plt.show()
|
||||
|
||||
return df
|
||||
|
||||
def cohen_d(x, y):
|
||||
# Mittelwerte der Gruppen
|
||||
mean_x, mean_y = x.mean(), y.mean()
|
||||
mean_diff = mean_x - mean_y
|
||||
|
||||
# Varianzen der Gruppen
|
||||
var_x, var_y = x.var(ddof=1), y.var(ddof=1)
|
||||
|
||||
# Stichprobengrößen der Gruppen
|
||||
size_x, size_y = len(x), len(y)
|
||||
|
||||
# Gepoolte Varianz und Standardabweichung
|
||||
pool_var = ((size_x - 1) * var_x + (size_y - 1) * var_y) / (size_x + size_y - 2)
|
||||
pool_var = math.sqrt(pool_var)
|
||||
|
||||
# Cohen's d
|
||||
d_val = mean_diff / pool_var
|
||||
return d_val
|
||||
|
||||
def find_barrier(x_sample_path, y_sample_path, alpha=0.1, lower_limit=2, upper_limit=10000):
|
||||
left = lower_limit
|
||||
right = upper_limit
|
||||
barrier_size = -1
|
||||
|
||||
# Binäre Suche nach der Schranke
|
||||
while left <= right:
|
||||
mid = (left + right) // 2
|
||||
x_sample = read_sample(x_sample_path, sample_size=mid, show=False)["Ratio"]
|
||||
y_sample = read_sample(y_sample_path, sample_size=mid, show=False)["Ratio"]
|
||||
stat, p_value = stats.mannwhitneyu(x_sample, y_sample, alternative="two-sided")
|
||||
|
||||
if p_value >= alpha:
|
||||
# Vermerke die aktuelle untere Schranke
|
||||
barrier_size = mid
|
||||
# Kein signifikantes Ergebnis gefunden, probiere größere Probe
|
||||
left = mid + 1
|
||||
else:
|
||||
# Signifikantes Ergebnis gefunden, probiere kleinere Probe
|
||||
right = mid - 1
|
||||
|
||||
x_sample = read_sample(x_sample_path, sample_size=barrier_size + 1, show=False)["Ratio"]
|
||||
y_sample = read_sample(y_sample_path, sample_size=barrier_size + 1, show=False)["Ratio"]
|
||||
stat, p_value = stats.mannwhitneyu(x_sample, y_sample, alternative="two-sided")
|
||||
print(f"\nSind die Unterschiede bei {barrier_size + 1} Proben signifikant? {str(p_value < alpha)}")
|
||||
|
||||
x_sample = read_sample(x_sample_path, sample_size=barrier_size, show=False)["Ratio"]
|
||||
y_sample = read_sample(y_sample_path, sample_size=barrier_size, show=False)["Ratio"]
|
||||
stat, p_value = stats.mannwhitneyu(x_sample, y_sample, alternative="two-sided")
|
||||
print(f"Sind die Unterschiede bei {barrier_size} Proben signifikant? {str(p_value < alpha)}")
|
||||
return barrier_size
|
||||
|
||||
def main():
|
||||
plt.close('all')
|
||||
print('Statistische Berechnungen zu Häufigkeiten (Übung 5)')
|
||||
|
||||
print('\nEinlesen der ersten Stichprobe (Python)')
|
||||
python_sample = read_sample(PYTHON_INPUT, sample_size=1000, show=SHOW_PLOTS, title="Python")
|
||||
print('Mean:' + str(python_sample['Ratio'].mean()))
|
||||
print('Variance:' + str(python_sample['Ratio'].var()))
|
||||
|
||||
print('\nEinlesen der zweiten Stichprobe (Java)')
|
||||
java_sample = read_sample(JAVA_INPUT, sample_size=1000, show=SHOW_PLOTS, title="Java")
|
||||
print('Mean:' + str(java_sample['Ratio'].mean()))
|
||||
print('Variance:' + str(java_sample['Ratio'].var()))
|
||||
|
||||
print('\nStatistische Tests')
|
||||
# Aufgabenbearbeitung ab hier
|
||||
|
||||
# Mann-Whitney-U-Test
|
||||
stat, p_value = stats.mannwhitneyu(python_sample['Ratio'], java_sample['Ratio'], alternative='two-sided')
|
||||
effect_size = cohen_d(python_sample['Ratio'], java_sample['Ratio'])
|
||||
|
||||
print(f"Globaler Durchschnitt Python: {python_sample.get('Ratio').mean()}")
|
||||
print(f"Globaler Durchschnitt Java: {java_sample.get('Ratio').mean()}")
|
||||
|
||||
print(f"Mann-Whitney-Test: Statistik {stat}, P-Wert {p_value}")
|
||||
print(f"Effektstärke (Cohen's d): {effect_size}")
|
||||
|
||||
# Experimentelle Bestimmung der Schranke
|
||||
barrier = find_barrier(PYTHON_INPUT, JAVA_INPUT, alpha=0.01)
|
||||
print(f"Untere Schranke, ab welcher der Test nicht signifikant ist: {barrier}")
|
||||
|
||||
# Press the green button in the gutter to run the script.
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
||||
|
91403
boa/analysis/data/try_Java_Jan_2022_last_revision.boa.output.txt
Normal file
91403
boa/analysis/data/try_Java_Jan_2022_last_revision.boa.output.txt
Normal file
File diff suppressed because it is too large
Load Diff
102423
boa/analysis/data/try_Python_Feb_2022_last_revision.boa.output.txt
Normal file
102423
boa/analysis/data/try_Python_Feb_2022_last_revision.boa.output.txt
Normal file
File diff suppressed because it is too large
Load Diff
9
boa/analysis/test.py
Normal file
9
boa/analysis/test.py
Normal file
@ -0,0 +1,9 @@
|
||||
import random
|
||||
|
||||
random.seed(1)
|
||||
print(random.randint(0, 10))
|
||||
print(random.randint(0, 10))
|
||||
print(random.randint(0, 10))
|
||||
print(random.randint(0, 10))
|
||||
print(random.randint(0, 10))
|
||||
print(random.randint(0, 10))
|
@ -1,5 +1,5 @@
|
||||
# Java-Job: 111128
|
||||
# Python-Job: 111124
|
||||
# Java-Job: 111694
|
||||
# Python-Job: 111439
|
||||
|
||||
# Includes all revisions
|
||||
p: Project = input;
|
||||
@ -12,7 +12,7 @@ cur_date: time;
|
||||
|
||||
statement_counter := visitor {
|
||||
before node: Statement -> {
|
||||
if(node.kind == StatementKind.TRY or node.kind == StatementKind.WITH)
|
||||
if (def(node.kind) and (node.kind == StatementKind.TRY or node.kind == StatementKind.WITH))
|
||||
relative_list[p.name][yearof(cur_date)] << 1;
|
||||
else
|
||||
relative_list[p.name][yearof(cur_date)] << 0;
|
||||
@ -22,12 +22,11 @@ statement_counter := visitor {
|
||||
visit(p, visitor {
|
||||
before node: CodeRepository -> {
|
||||
for (minus_year: int=22; minus_year >= 0; minus_year--) {
|
||||
cur_date = addyear(now(), -minus_year);
|
||||
cur_date = addyear(T"Dec 31, 2022, 10:00:00 AM", -minus_year);
|
||||
snapshot := getsnapshot(node, cur_date);
|
||||
|
||||
if (def(snapshot))
|
||||
foreach (i: int; def(snapshot[i]))
|
||||
visit(snapshot[i], statement_counter);
|
||||
foreach (i: int; def(snapshot[i]))
|
||||
visit(snapshot[i], statement_counter);
|
||||
}
|
||||
}
|
||||
});
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user