add llm chapter

This commit is contained in:
Jan-Niclas Loosen
2025-01-30 11:12:08 +01:00
parent bd30888c3f
commit ce17502029
22 changed files with 443 additions and 26 deletions

View File

@@ -8,11 +8,14 @@ import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('TkAgg')
JAVA_INPUT = './data/try_Java_Jan_2022_last_revision.boa.output.txt'
PYTHON_INPUT = './data/try_Python_Feb_2022_last_revision.boa.output.txt'
# Input Pfade
JAVA_INPUT = './boa-output/try_Java_Jan_2022_last_revision.boa.output.txt'
PYTHON_INPUT = './boa-output/try_Python_Feb_2022_last_revision.boa.output.txt'
SHOW_PLOTS = False
SEED = 3
# Konfiguration von read_sample
SEED = 1
SHOW_PLOTS = True
SAMPLES = 1000
def read_sample(file_path, title='', sample_size=0, show=True):
# filename kann auch ein URL sein: "https://..../example.csv"
@@ -50,12 +53,11 @@ def cohen_d(x, y):
# Stichprobengrößen der Gruppen
size_x, size_y = len(x), len(y)
# Gepoolte Varianz und Standardabweichung
pool_var = ((size_x - 1) * var_x + (size_y - 1) * var_y) / (size_x + size_y - 2)
pool_var = math.sqrt(pool_var)
# Gepoolte Standardabweichung
pool_std = math.sqrt(((size_x - 1) * var_x + (size_y - 1) * var_y) / (size_x + size_y - 2))
# Cohen's d
d_val = mean_diff / pool_var
d_val = mean_diff / pool_std
return d_val
def find_barrier(x_sample_path, y_sample_path, alpha=0.1, lower_limit=2, upper_limit=10000):
@@ -95,12 +97,12 @@ def main():
print('Statistische Berechnungen zu Häufigkeiten (Übung 5)')
print('\nEinlesen der ersten Stichprobe (Python)')
python_sample = read_sample(PYTHON_INPUT, sample_size=1000, show=SHOW_PLOTS, title="Python")
python_sample = read_sample(PYTHON_INPUT, sample_size=SAMPLES, show=SHOW_PLOTS, title="Python")
print('Mean:' + str(python_sample['Ratio'].mean()))
print('Variance:' + str(python_sample['Ratio'].var()))
print('\nEinlesen der zweiten Stichprobe (Java)')
java_sample = read_sample(JAVA_INPUT, sample_size=1000, show=SHOW_PLOTS, title="Java")
java_sample = read_sample(JAVA_INPUT, sample_size=SAMPLES, show=SHOW_PLOTS, title="Java")
print('Mean:' + str(java_sample['Ratio'].mean()))
print('Variance:' + str(java_sample['Ratio'].var()))

View File

@@ -1,9 +0,0 @@
import random
random.seed(1)
print(random.randint(0, 10))
print(random.randint(0, 10))
print(random.randint(0, 10))
print(random.randint(0, 10))
print(random.randint(0, 10))
print(random.randint(0, 10))

View File

@@ -1,5 +1,5 @@
# Java-Job: 111005
# Python-Job: 111006
# Java-Job: 111839
# Python-Job: 111840
# Includes all revisions
p: Project = input;

View File

@@ -1,5 +1,5 @@
# Java-Job: 111694
# Python-Job: 111439
# Java-Job: Throws empty error on the "2022 Jan/Java" dataset.
# Python-Job: 111838
# Includes all revisions
p: Project = input;
@@ -12,7 +12,7 @@ cur_date: time;
statement_counter := visitor {
before node: Statement -> {
if (def(node.kind) and (node.kind == StatementKind.TRY or node.kind == StatementKind.WITH))
if (node.kind == StatementKind.TRY or node.kind == StatementKind.WITH)
relative_list[p.name][yearof(cur_date)] << 1;
else
relative_list[p.name][yearof(cur_date)] << 0;
@@ -21,8 +21,8 @@ statement_counter := visitor {
visit(p, visitor {
before node: CodeRepository -> {
for (minus_year: int=22; minus_year >= 0; minus_year--) {
cur_date = addyear(T"Dec 31, 2022, 10:00:00 AM", -minus_year);
for (minus_year: int=19; minus_year >= 0; minus_year--) {
cur_date = addyear(T"Dec 31, 2019, 10:00:00 AM", -minus_year);
snapshot := getsnapshot(node, cur_date);
foreach (i: int; def(snapshot[i]))