add llm chapter
This commit is contained in:
@@ -8,11 +8,14 @@ import matplotlib.pyplot as plt
|
||||
import matplotlib
|
||||
matplotlib.use('TkAgg')
|
||||
|
||||
JAVA_INPUT = './data/try_Java_Jan_2022_last_revision.boa.output.txt'
|
||||
PYTHON_INPUT = './data/try_Python_Feb_2022_last_revision.boa.output.txt'
|
||||
# Input Pfade
|
||||
JAVA_INPUT = './boa-output/try_Java_Jan_2022_last_revision.boa.output.txt'
|
||||
PYTHON_INPUT = './boa-output/try_Python_Feb_2022_last_revision.boa.output.txt'
|
||||
|
||||
SHOW_PLOTS = False
|
||||
SEED = 3
|
||||
# Konfiguration von read_sample
|
||||
SEED = 1
|
||||
SHOW_PLOTS = True
|
||||
SAMPLES = 1000
|
||||
|
||||
def read_sample(file_path, title='', sample_size=0, show=True):
|
||||
# filename kann auch ein URL sein: "https://..../example.csv"
|
||||
@@ -50,12 +53,11 @@ def cohen_d(x, y):
|
||||
# Stichprobengrößen der Gruppen
|
||||
size_x, size_y = len(x), len(y)
|
||||
|
||||
# Gepoolte Varianz und Standardabweichung
|
||||
pool_var = ((size_x - 1) * var_x + (size_y - 1) * var_y) / (size_x + size_y - 2)
|
||||
pool_var = math.sqrt(pool_var)
|
||||
# Gepoolte Standardabweichung
|
||||
pool_std = math.sqrt(((size_x - 1) * var_x + (size_y - 1) * var_y) / (size_x + size_y - 2))
|
||||
|
||||
# Cohen's d
|
||||
d_val = mean_diff / pool_var
|
||||
d_val = mean_diff / pool_std
|
||||
return d_val
|
||||
|
||||
def find_barrier(x_sample_path, y_sample_path, alpha=0.1, lower_limit=2, upper_limit=10000):
|
||||
@@ -95,12 +97,12 @@ def main():
|
||||
print('Statistische Berechnungen zu Häufigkeiten (Übung 5)')
|
||||
|
||||
print('\nEinlesen der ersten Stichprobe (Python)')
|
||||
python_sample = read_sample(PYTHON_INPUT, sample_size=1000, show=SHOW_PLOTS, title="Python")
|
||||
python_sample = read_sample(PYTHON_INPUT, sample_size=SAMPLES, show=SHOW_PLOTS, title="Python")
|
||||
print('Mean:' + str(python_sample['Ratio'].mean()))
|
||||
print('Variance:' + str(python_sample['Ratio'].var()))
|
||||
|
||||
print('\nEinlesen der zweiten Stichprobe (Java)')
|
||||
java_sample = read_sample(JAVA_INPUT, sample_size=1000, show=SHOW_PLOTS, title="Java")
|
||||
java_sample = read_sample(JAVA_INPUT, sample_size=SAMPLES, show=SHOW_PLOTS, title="Java")
|
||||
print('Mean:' + str(java_sample['Ratio'].mean()))
|
||||
print('Variance:' + str(java_sample['Ratio'].var()))
|
||||
|
||||
|
@@ -1,9 +0,0 @@
|
||||
import random
|
||||
|
||||
random.seed(1)
|
||||
print(random.randint(0, 10))
|
||||
print(random.randint(0, 10))
|
||||
print(random.randint(0, 10))
|
||||
print(random.randint(0, 10))
|
||||
print(random.randint(0, 10))
|
||||
print(random.randint(0, 10))
|
@@ -1,5 +1,5 @@
|
||||
# Java-Job: 111005
|
||||
# Python-Job: 111006
|
||||
# Java-Job: 111839
|
||||
# Python-Job: 111840
|
||||
|
||||
# Includes all revisions
|
||||
p: Project = input;
|
||||
|
@@ -1,5 +1,5 @@
|
||||
# Java-Job: 111694
|
||||
# Python-Job: 111439
|
||||
# Java-Job: Throws empty error on the "2022 Jan/Java" dataset.
|
||||
# Python-Job: 111838
|
||||
|
||||
# Includes all revisions
|
||||
p: Project = input;
|
||||
@@ -12,7 +12,7 @@ cur_date: time;
|
||||
|
||||
statement_counter := visitor {
|
||||
before node: Statement -> {
|
||||
if (def(node.kind) and (node.kind == StatementKind.TRY or node.kind == StatementKind.WITH))
|
||||
if (node.kind == StatementKind.TRY or node.kind == StatementKind.WITH)
|
||||
relative_list[p.name][yearof(cur_date)] << 1;
|
||||
else
|
||||
relative_list[p.name][yearof(cur_date)] << 0;
|
||||
@@ -21,8 +21,8 @@ statement_counter := visitor {
|
||||
|
||||
visit(p, visitor {
|
||||
before node: CodeRepository -> {
|
||||
for (minus_year: int=22; minus_year >= 0; minus_year--) {
|
||||
cur_date = addyear(T"Dec 31, 2022, 10:00:00 AM", -minus_year);
|
||||
for (minus_year: int=19; minus_year >= 0; minus_year--) {
|
||||
cur_date = addyear(T"Dec 31, 2019, 10:00:00 AM", -minus_year);
|
||||
snapshot := getsnapshot(node, cur_date);
|
||||
|
||||
foreach (i: int; def(snapshot[i]))
|
||||
|
Reference in New Issue
Block a user